From cd3c88ad7e9b5ae315e14784db256733ab43431b Mon Sep 17 00:00:00 2001
From: Stenzek <stenzek@gmail.com>
Date: Sun, 24 Mar 2024 14:26:41 +1000
Subject: [PATCH] deps: Remove now-unused vendored libs

---
 dep/CMakeLists.txt                            |    8 -
 dep/libjpeg/CMakeLists.txt                    |   65 -
 dep/libjpeg/README                            |  374 -
 dep/libjpeg/change.log                        |  527 --
 dep/libjpeg/include/jconfig.h                 |  171 -
 dep/libjpeg/include/jerror.h                  |  304 -
 dep/libjpeg/include/jmorecfg.h                |  457 --
 dep/libjpeg/include/jpegint.h                 |  445 --
 dep/libjpeg/include/jpeglib.h                 | 1183 ---
 dep/libjpeg/libjpeg.vcxproj                   |   76 -
 dep/libjpeg/libjpeg.vcxproj.filters           |   52 -
 dep/libjpeg/src/jaricom.c                     |  153 -
 dep/libjpeg/src/jcapimin.c                    |  288 -
 dep/libjpeg/src/jcapistd.c                    |  162 -
 dep/libjpeg/src/jcarith.c                     |  945 ---
 dep/libjpeg/src/jccoefct.c                    |  456 --
 dep/libjpeg/src/jccolor.c                     |  598 --
 dep/libjpeg/src/jcdctmgr.c                    |  466 --
 dep/libjpeg/src/jchuff.c                      | 1656 -----
 dep/libjpeg/src/jcinit.c                      |  249 -
 dep/libjpeg/src/jcmainct.c                    |  297 -
 dep/libjpeg/src/jcmarker.c                    |  717 --
 dep/libjpeg/src/jcmaster.c                    |  675 --
 dep/libjpeg/src/jcomapi.c                     |  244 -
 dep/libjpeg/src/jcparam.c                     |  591 --
 dep/libjpeg/src/jcprepct.c                    |  358 -
 dep/libjpeg/src/jcsample.c                    |  545 --
 dep/libjpeg/src/jctrans.c                     |  399 --
 dep/libjpeg/src/jdapimin.c                    |  412 --
 dep/libjpeg/src/jdapistd.c                    |  276 -
 dep/libjpeg/src/jdarith.c                     |  796 ---
 dep/libjpeg/src/jdatadst.c                    |  263 -
 dep/libjpeg/src/jdatasrc.c                    |  271 -
 dep/libjpeg/src/jdcoefct.c                    |  744 --
 dep/libjpeg/src/jdcolor.c                     |  769 --
 dep/libjpeg/src/jdct.h                        |  409 --
 dep/libjpeg/src/jddctmgr.c                    |  384 -
 dep/libjpeg/src/jdhuff.c                      | 1559 ----
 dep/libjpeg/src/jdinput.c                     |  657 --
 dep/libjpeg/src/jdmainct.c                    |  511 --
 dep/libjpeg/src/jdmarker.c                    | 1505 ----
 dep/libjpeg/src/jdmaster.c                    |  532 --
 dep/libjpeg/src/jdmerge.c                     |  437 --
 dep/libjpeg/src/jdpostct.c                    |  290 -
 dep/libjpeg/src/jdsample.c                    |  341 -
 dep/libjpeg/src/jdtrans.c                     |  140 -
 dep/libjpeg/src/jerror.c                      |  253 -
 dep/libjpeg/src/jfdctflt.c                    |  176 -
 dep/libjpeg/src/jfdctfst.c                    |  232 -
 dep/libjpeg/src/jfdctint.c                    | 4415 ------------
 dep/libjpeg/src/jidctflt.c                    |  238 -
 dep/libjpeg/src/jidctfst.c                    |  351 -
 dep/libjpeg/src/jidctint.c                    | 5240 --------------
 dep/libjpeg/src/jinclude.h                    |  157 -
 dep/libjpeg/src/jmemmgr.c                     | 1115 ---
 dep/libjpeg/src/jmemnobs.c                    |  113 -
 dep/libjpeg/src/jmemsys.h                     |  198 -
 dep/libjpeg/src/jquant1.c                     |  851 ---
 dep/libjpeg/src/jquant2.c                     | 1311 ----
 dep/libjpeg/src/jutils.c                      |  224 -
 dep/libjpeg/src/jversion.h                    |   14 -
 dep/libjpeg/src/transupp.c                    | 2433 -------
 dep/libjpeg/src/transupp.h                    |  230 -
 dep/libpng/ANNOUNCE                           |   63 -
 dep/libpng/AUTHORS                            |   61 -
 dep/libpng/CHANGES                            | 6202 ----------------
 dep/libpng/CMakeLists.txt                     |   39 -
 dep/libpng/LICENSE                            |  134 -
 dep/libpng/README                             |  184 -
 dep/libpng/TODO                               |   23 -
 dep/libpng/TRADEMARK                          |    8 -
 dep/libpng/include/png.h                      | 3250 ---------
 dep/libpng/include/pngconf.h                  |  623 --
 dep/libpng/include/pnglibconf.h               |  224 -
 dep/libpng/libpng.vcxproj                     |   61 -
 dep/libpng/libpng.vcxproj.filters             |   52 -
 dep/libpng/src/arm/arm_init.c                 |  139 -
 dep/libpng/src/arm/filter_neon.S              |  253 -
 dep/libpng/src/arm/filter_neon_intrinsics.c   |  402 --
 dep/libpng/src/arm/palette_neon_intrinsics.c  |  151 -
 dep/libpng/src/intel/filter_sse2_intrinsics.c |  391 -
 dep/libpng/src/intel/intel_init.c             |   52 -
 dep/libpng/src/png.c                          | 4559 ------------
 dep/libpng/src/pngdebug.h                     |  153 -
 dep/libpng/src/pngerror.c                     |  957 ---
 dep/libpng/src/pngget.c                       | 1267 ----
 dep/libpng/src/pnginfo.h                      |  267 -
 dep/libpng/src/pngmem.c                       |  284 -
 dep/libpng/src/pngpread.c                     | 1104 ---
 dep/libpng/src/pngpriv.h                      | 2221 ------
 dep/libpng/src/pngread.c                      | 4228 -----------
 dep/libpng/src/pngrio.c                       |  120 -
 dep/libpng/src/pngrtran.c                     | 5040 -------------
 dep/libpng/src/pngrutil.c                     | 4680 ------------
 dep/libpng/src/pngset.c                       | 1803 -----
 dep/libpng/src/pngstruct.h                    |  479 --
 dep/libpng/src/pngtrans.c                     |  868 ---
 dep/libpng/src/pngwio.c                       |  168 -
 dep/libpng/src/pngwrite.c                     | 2418 -------
 dep/libpng/src/pngwtran.c                     |  575 --
 dep/libpng/src/pngwutil.c                     | 2781 --------
 dep/zlib/CMakeLists.txt                       |   33 -
 dep/zlib/ChangeLog                            | 1515 ----
 dep/zlib/FAQ                                  |  368 -
 dep/zlib/README                               |  115 -
 dep/zlib/include/zconf.h                      |  534 --
 dep/zlib/include/zlib.h                       | 1912 -----
 dep/zlib/src/adler32.c                        |  186 -
 dep/zlib/src/compress.c                       |   86 -
 dep/zlib/src/crc32.c                          |  442 --
 dep/zlib/src/crc32.h                          |  441 --
 dep/zlib/src/deflate.c                        | 2163 ------
 dep/zlib/src/deflate.h                        |  349 -
 dep/zlib/src/gzclose.c                        |   25 -
 dep/zlib/src/gzguts.h                         |  220 -
 dep/zlib/src/gzlib.c                          |  637 --
 dep/zlib/src/gzread.c                         |  654 --
 dep/zlib/src/gzwrite.c                        |  665 --
 dep/zlib/src/infback.c                        |  640 --
 dep/zlib/src/inffast.c                        |  323 -
 dep/zlib/src/inffast.h                        |   11 -
 dep/zlib/src/inffixed.h                       |   94 -
 dep/zlib/src/inflate.c                        | 1561 ----
 dep/zlib/src/inflate.h                        |  125 -
 dep/zlib/src/inftrees.c                       |  304 -
 dep/zlib/src/inftrees.h                       |   62 -
 dep/zlib/src/trees.c                          | 1203 ----
 dep/zlib/src/trees.h                          |  128 -
 dep/zlib/src/uncompr.c                        |   93 -
 dep/zlib/src/zutil.c                          |  325 -
 dep/zlib/src/zutil.h                          |  271 -
 dep/zlib/zlib.vcxproj                         |   50 -
 dep/zlib/zlib.vcxproj.filters                 |   33 -
 dep/zstd/CMakeLists.txt                       |   38 -
 dep/zstd/LICENSE                              |   30 -
 dep/zstd/lib/common/bitstream.h               |  478 --
 dep/zstd/lib/common/compiler.h                |  335 -
 dep/zstd/lib/common/cpu.h                     |  213 -
 dep/zstd/lib/common/debug.c                   |   24 -
 dep/zstd/lib/common/debug.h                   |  107 -
 dep/zstd/lib/common/entropy_common.c          |  368 -
 dep/zstd/lib/common/error_private.c           |   56 -
 dep/zstd/lib/common/error_private.h           |  159 -
 dep/zstd/lib/common/fse.h                     |  717 --
 dep/zstd/lib/common/fse_decompress.c          |  403 --
 dep/zstd/lib/common/huf.h                     |  364 -
 dep/zstd/lib/common/mem.h                     |  442 --
 dep/zstd/lib/common/pool.c                    |  355 -
 dep/zstd/lib/common/pool.h                    |   84 -
 dep/zstd/lib/common/portability_macros.h      |  137 -
 dep/zstd/lib/common/threading.c               |  122 -
 dep/zstd/lib/common/threading.h               |  155 -
 dep/zstd/lib/common/xxhash.c                  |   24 -
 dep/zstd/lib/common/xxhash.h                  | 5686 ---------------
 dep/zstd/lib/common/zstd_common.c             |   83 -
 dep/zstd/lib/common/zstd_deps.h               |  111 -
 dep/zstd/lib/common/zstd_internal.h           |  493 --
 dep/zstd/lib/common/zstd_trace.h              |  163 -
 dep/zstd/lib/compress/clevels.h               |  134 -
 dep/zstd/lib/compress/fse_compress.c          |  741 --
 dep/zstd/lib/compress/hist.c                  |  181 -
 dep/zstd/lib/compress/hist.h                  |   75 -
 dep/zstd/lib/compress/huf_compress.c          | 1370 ----
 dep/zstd/lib/compress/zstd_compress.c         | 6327 -----------------
 .../lib/compress/zstd_compress_internal.h     | 1458 ----
 .../lib/compress/zstd_compress_literals.c     |  159 -
 .../lib/compress/zstd_compress_literals.h     |   31 -
 .../lib/compress/zstd_compress_sequences.c    |  442 --
 .../lib/compress/zstd_compress_sequences.h    |   54 -
 .../lib/compress/zstd_compress_superblock.c   |  573 --
 .../lib/compress/zstd_compress_superblock.h   |   32 -
 dep/zstd/lib/compress/zstd_cwksp.h            |  676 --
 dep/zstd/lib/compress/zstd_double_fast.c      |  696 --
 dep/zstd/lib/compress/zstd_double_fast.h      |   38 -
 dep/zstd/lib/compress/zstd_fast.c             |  675 --
 dep/zstd/lib/compress/zstd_fast.h             |   37 -
 dep/zstd/lib/compress/zstd_lazy.c             | 2104 ------
 dep/zstd/lib/compress/zstd_lazy.h             |  125 -
 dep/zstd/lib/compress/zstd_ldm.c              |  724 --
 dep/zstd/lib/compress/zstd_ldm.h              |  117 -
 dep/zstd/lib/compress/zstd_ldm_geartab.h      |  106 -
 dep/zstd/lib/compress/zstd_opt.c              | 1446 ----
 dep/zstd/lib/compress/zstd_opt.h              |   56 -
 dep/zstd/lib/compress/zstdmt_compress.c       | 1859 -----
 dep/zstd/lib/compress/zstdmt_compress.h       |  113 -
 dep/zstd/lib/decompress/huf_decompress.c      | 1889 -----
 .../lib/decompress/huf_decompress_amd64.S     |  585 --
 dep/zstd/lib/decompress/zstd_ddict.c          |  244 -
 dep/zstd/lib/decompress/zstd_ddict.h          |   44 -
 dep/zstd/lib/decompress/zstd_decompress.c     | 2230 ------
 .../lib/decompress/zstd_decompress_block.c    | 2072 ------
 .../lib/decompress/zstd_decompress_block.h    |   68 -
 .../lib/decompress/zstd_decompress_internal.h |  236 -
 dep/zstd/lib/zdict.h                          |  452 --
 dep/zstd/lib/zstd.h                           | 2575 -------
 dep/zstd/lib/zstd_errors.h                    |   95 -
 dep/zstd/zstd.vcxproj                         |   76 -
 dep/zstd/zstd.vcxproj.filters                 |  189 -
 198 files changed, 144411 deletions(-)
 delete mode 100644 dep/libjpeg/CMakeLists.txt
 delete mode 100644 dep/libjpeg/README
 delete mode 100644 dep/libjpeg/change.log
 delete mode 100644 dep/libjpeg/include/jconfig.h
 delete mode 100644 dep/libjpeg/include/jerror.h
 delete mode 100644 dep/libjpeg/include/jmorecfg.h
 delete mode 100644 dep/libjpeg/include/jpegint.h
 delete mode 100644 dep/libjpeg/include/jpeglib.h
 delete mode 100644 dep/libjpeg/libjpeg.vcxproj
 delete mode 100644 dep/libjpeg/libjpeg.vcxproj.filters
 delete mode 100644 dep/libjpeg/src/jaricom.c
 delete mode 100644 dep/libjpeg/src/jcapimin.c
 delete mode 100644 dep/libjpeg/src/jcapistd.c
 delete mode 100644 dep/libjpeg/src/jcarith.c
 delete mode 100644 dep/libjpeg/src/jccoefct.c
 delete mode 100644 dep/libjpeg/src/jccolor.c
 delete mode 100644 dep/libjpeg/src/jcdctmgr.c
 delete mode 100644 dep/libjpeg/src/jchuff.c
 delete mode 100644 dep/libjpeg/src/jcinit.c
 delete mode 100644 dep/libjpeg/src/jcmainct.c
 delete mode 100644 dep/libjpeg/src/jcmarker.c
 delete mode 100644 dep/libjpeg/src/jcmaster.c
 delete mode 100644 dep/libjpeg/src/jcomapi.c
 delete mode 100644 dep/libjpeg/src/jcparam.c
 delete mode 100644 dep/libjpeg/src/jcprepct.c
 delete mode 100644 dep/libjpeg/src/jcsample.c
 delete mode 100644 dep/libjpeg/src/jctrans.c
 delete mode 100644 dep/libjpeg/src/jdapimin.c
 delete mode 100644 dep/libjpeg/src/jdapistd.c
 delete mode 100644 dep/libjpeg/src/jdarith.c
 delete mode 100644 dep/libjpeg/src/jdatadst.c
 delete mode 100644 dep/libjpeg/src/jdatasrc.c
 delete mode 100644 dep/libjpeg/src/jdcoefct.c
 delete mode 100644 dep/libjpeg/src/jdcolor.c
 delete mode 100644 dep/libjpeg/src/jdct.h
 delete mode 100644 dep/libjpeg/src/jddctmgr.c
 delete mode 100644 dep/libjpeg/src/jdhuff.c
 delete mode 100644 dep/libjpeg/src/jdinput.c
 delete mode 100644 dep/libjpeg/src/jdmainct.c
 delete mode 100644 dep/libjpeg/src/jdmarker.c
 delete mode 100644 dep/libjpeg/src/jdmaster.c
 delete mode 100644 dep/libjpeg/src/jdmerge.c
 delete mode 100644 dep/libjpeg/src/jdpostct.c
 delete mode 100644 dep/libjpeg/src/jdsample.c
 delete mode 100644 dep/libjpeg/src/jdtrans.c
 delete mode 100644 dep/libjpeg/src/jerror.c
 delete mode 100644 dep/libjpeg/src/jfdctflt.c
 delete mode 100644 dep/libjpeg/src/jfdctfst.c
 delete mode 100644 dep/libjpeg/src/jfdctint.c
 delete mode 100644 dep/libjpeg/src/jidctflt.c
 delete mode 100644 dep/libjpeg/src/jidctfst.c
 delete mode 100644 dep/libjpeg/src/jidctint.c
 delete mode 100644 dep/libjpeg/src/jinclude.h
 delete mode 100644 dep/libjpeg/src/jmemmgr.c
 delete mode 100644 dep/libjpeg/src/jmemnobs.c
 delete mode 100644 dep/libjpeg/src/jmemsys.h
 delete mode 100644 dep/libjpeg/src/jquant1.c
 delete mode 100644 dep/libjpeg/src/jquant2.c
 delete mode 100644 dep/libjpeg/src/jutils.c
 delete mode 100644 dep/libjpeg/src/jversion.h
 delete mode 100644 dep/libjpeg/src/transupp.c
 delete mode 100644 dep/libjpeg/src/transupp.h
 delete mode 100644 dep/libpng/ANNOUNCE
 delete mode 100644 dep/libpng/AUTHORS
 delete mode 100644 dep/libpng/CHANGES
 delete mode 100644 dep/libpng/CMakeLists.txt
 delete mode 100644 dep/libpng/LICENSE
 delete mode 100644 dep/libpng/README
 delete mode 100644 dep/libpng/TODO
 delete mode 100644 dep/libpng/TRADEMARK
 delete mode 100644 dep/libpng/include/png.h
 delete mode 100644 dep/libpng/include/pngconf.h
 delete mode 100644 dep/libpng/include/pnglibconf.h
 delete mode 100644 dep/libpng/libpng.vcxproj
 delete mode 100644 dep/libpng/libpng.vcxproj.filters
 delete mode 100644 dep/libpng/src/arm/arm_init.c
 delete mode 100644 dep/libpng/src/arm/filter_neon.S
 delete mode 100644 dep/libpng/src/arm/filter_neon_intrinsics.c
 delete mode 100644 dep/libpng/src/arm/palette_neon_intrinsics.c
 delete mode 100644 dep/libpng/src/intel/filter_sse2_intrinsics.c
 delete mode 100644 dep/libpng/src/intel/intel_init.c
 delete mode 100644 dep/libpng/src/png.c
 delete mode 100644 dep/libpng/src/pngdebug.h
 delete mode 100644 dep/libpng/src/pngerror.c
 delete mode 100644 dep/libpng/src/pngget.c
 delete mode 100644 dep/libpng/src/pnginfo.h
 delete mode 100644 dep/libpng/src/pngmem.c
 delete mode 100644 dep/libpng/src/pngpread.c
 delete mode 100644 dep/libpng/src/pngpriv.h
 delete mode 100644 dep/libpng/src/pngread.c
 delete mode 100644 dep/libpng/src/pngrio.c
 delete mode 100644 dep/libpng/src/pngrtran.c
 delete mode 100644 dep/libpng/src/pngrutil.c
 delete mode 100644 dep/libpng/src/pngset.c
 delete mode 100644 dep/libpng/src/pngstruct.h
 delete mode 100644 dep/libpng/src/pngtrans.c
 delete mode 100644 dep/libpng/src/pngwio.c
 delete mode 100644 dep/libpng/src/pngwrite.c
 delete mode 100644 dep/libpng/src/pngwtran.c
 delete mode 100644 dep/libpng/src/pngwutil.c
 delete mode 100644 dep/zlib/CMakeLists.txt
 delete mode 100644 dep/zlib/ChangeLog
 delete mode 100644 dep/zlib/FAQ
 delete mode 100644 dep/zlib/README
 delete mode 100644 dep/zlib/include/zconf.h
 delete mode 100644 dep/zlib/include/zlib.h
 delete mode 100644 dep/zlib/src/adler32.c
 delete mode 100644 dep/zlib/src/compress.c
 delete mode 100644 dep/zlib/src/crc32.c
 delete mode 100644 dep/zlib/src/crc32.h
 delete mode 100644 dep/zlib/src/deflate.c
 delete mode 100644 dep/zlib/src/deflate.h
 delete mode 100644 dep/zlib/src/gzclose.c
 delete mode 100644 dep/zlib/src/gzguts.h
 delete mode 100644 dep/zlib/src/gzlib.c
 delete mode 100644 dep/zlib/src/gzread.c
 delete mode 100644 dep/zlib/src/gzwrite.c
 delete mode 100644 dep/zlib/src/infback.c
 delete mode 100644 dep/zlib/src/inffast.c
 delete mode 100644 dep/zlib/src/inffast.h
 delete mode 100644 dep/zlib/src/inffixed.h
 delete mode 100644 dep/zlib/src/inflate.c
 delete mode 100644 dep/zlib/src/inflate.h
 delete mode 100644 dep/zlib/src/inftrees.c
 delete mode 100644 dep/zlib/src/inftrees.h
 delete mode 100644 dep/zlib/src/trees.c
 delete mode 100644 dep/zlib/src/trees.h
 delete mode 100644 dep/zlib/src/uncompr.c
 delete mode 100644 dep/zlib/src/zutil.c
 delete mode 100644 dep/zlib/src/zutil.h
 delete mode 100644 dep/zlib/zlib.vcxproj
 delete mode 100644 dep/zlib/zlib.vcxproj.filters
 delete mode 100644 dep/zstd/CMakeLists.txt
 delete mode 100644 dep/zstd/LICENSE
 delete mode 100644 dep/zstd/lib/common/bitstream.h
 delete mode 100644 dep/zstd/lib/common/compiler.h
 delete mode 100644 dep/zstd/lib/common/cpu.h
 delete mode 100644 dep/zstd/lib/common/debug.c
 delete mode 100644 dep/zstd/lib/common/debug.h
 delete mode 100644 dep/zstd/lib/common/entropy_common.c
 delete mode 100644 dep/zstd/lib/common/error_private.c
 delete mode 100644 dep/zstd/lib/common/error_private.h
 delete mode 100644 dep/zstd/lib/common/fse.h
 delete mode 100644 dep/zstd/lib/common/fse_decompress.c
 delete mode 100644 dep/zstd/lib/common/huf.h
 delete mode 100644 dep/zstd/lib/common/mem.h
 delete mode 100644 dep/zstd/lib/common/pool.c
 delete mode 100644 dep/zstd/lib/common/pool.h
 delete mode 100644 dep/zstd/lib/common/portability_macros.h
 delete mode 100644 dep/zstd/lib/common/threading.c
 delete mode 100644 dep/zstd/lib/common/threading.h
 delete mode 100644 dep/zstd/lib/common/xxhash.c
 delete mode 100644 dep/zstd/lib/common/xxhash.h
 delete mode 100644 dep/zstd/lib/common/zstd_common.c
 delete mode 100644 dep/zstd/lib/common/zstd_deps.h
 delete mode 100644 dep/zstd/lib/common/zstd_internal.h
 delete mode 100644 dep/zstd/lib/common/zstd_trace.h
 delete mode 100644 dep/zstd/lib/compress/clevels.h
 delete mode 100644 dep/zstd/lib/compress/fse_compress.c
 delete mode 100644 dep/zstd/lib/compress/hist.c
 delete mode 100644 dep/zstd/lib/compress/hist.h
 delete mode 100644 dep/zstd/lib/compress/huf_compress.c
 delete mode 100644 dep/zstd/lib/compress/zstd_compress.c
 delete mode 100644 dep/zstd/lib/compress/zstd_compress_internal.h
 delete mode 100644 dep/zstd/lib/compress/zstd_compress_literals.c
 delete mode 100644 dep/zstd/lib/compress/zstd_compress_literals.h
 delete mode 100644 dep/zstd/lib/compress/zstd_compress_sequences.c
 delete mode 100644 dep/zstd/lib/compress/zstd_compress_sequences.h
 delete mode 100644 dep/zstd/lib/compress/zstd_compress_superblock.c
 delete mode 100644 dep/zstd/lib/compress/zstd_compress_superblock.h
 delete mode 100644 dep/zstd/lib/compress/zstd_cwksp.h
 delete mode 100644 dep/zstd/lib/compress/zstd_double_fast.c
 delete mode 100644 dep/zstd/lib/compress/zstd_double_fast.h
 delete mode 100644 dep/zstd/lib/compress/zstd_fast.c
 delete mode 100644 dep/zstd/lib/compress/zstd_fast.h
 delete mode 100644 dep/zstd/lib/compress/zstd_lazy.c
 delete mode 100644 dep/zstd/lib/compress/zstd_lazy.h
 delete mode 100644 dep/zstd/lib/compress/zstd_ldm.c
 delete mode 100644 dep/zstd/lib/compress/zstd_ldm.h
 delete mode 100644 dep/zstd/lib/compress/zstd_ldm_geartab.h
 delete mode 100644 dep/zstd/lib/compress/zstd_opt.c
 delete mode 100644 dep/zstd/lib/compress/zstd_opt.h
 delete mode 100644 dep/zstd/lib/compress/zstdmt_compress.c
 delete mode 100644 dep/zstd/lib/compress/zstdmt_compress.h
 delete mode 100644 dep/zstd/lib/decompress/huf_decompress.c
 delete mode 100644 dep/zstd/lib/decompress/huf_decompress_amd64.S
 delete mode 100644 dep/zstd/lib/decompress/zstd_ddict.c
 delete mode 100644 dep/zstd/lib/decompress/zstd_ddict.h
 delete mode 100644 dep/zstd/lib/decompress/zstd_decompress.c
 delete mode 100644 dep/zstd/lib/decompress/zstd_decompress_block.c
 delete mode 100644 dep/zstd/lib/decompress/zstd_decompress_block.h
 delete mode 100644 dep/zstd/lib/decompress/zstd_decompress_internal.h
 delete mode 100644 dep/zstd/lib/zdict.h
 delete mode 100644 dep/zstd/lib/zstd.h
 delete mode 100644 dep/zstd/lib/zstd_errors.h
 delete mode 100644 dep/zstd/zstd.vcxproj
 delete mode 100644 dep/zstd/zstd.vcxproj.filters

diff --git a/dep/CMakeLists.txt b/dep/CMakeLists.txt
index 438c8a10c..183c08d1a 100644
--- a/dep/CMakeLists.txt
+++ b/dep/CMakeLists.txt
@@ -30,14 +30,6 @@ disable_compiler_warnings_for_target(rcheevos)
 add_subdirectory(rapidyaml EXCLUDE_FROM_ALL)
 disable_compiler_warnings_for_target(rapidyaml)
 
-# Build dependencies on Windows/Android.
-if(WIN32 OR ANDROID)
-  add_subdirectory(zlib EXCLUDE_FROM_ALL)
-  add_subdirectory(zstd EXCLUDE_FROM_ALL)
-  add_subdirectory(libpng EXCLUDE_FROM_ALL)
-  add_subdirectory(libjpeg EXCLUDE_FROM_ALL)
-endif()
-
 if(ENABLE_CUBEB)
   add_subdirectory(cubeb EXCLUDE_FROM_ALL)
   disable_compiler_warnings_for_target(cubeb)
diff --git a/dep/libjpeg/CMakeLists.txt b/dep/libjpeg/CMakeLists.txt
deleted file mode 100644
index 3af958df4..000000000
--- a/dep/libjpeg/CMakeLists.txt
+++ /dev/null
@@ -1,65 +0,0 @@
-add_library(jpeg
-  include/jconfig.h
-  include/jerror.h
-  include/jmorecfg.h
-  include/jpegint.h
-  include/jpeglib.h
-  src/jaricom.c
-  src/jcapimin.c
-  src/jcapistd.c
-  src/jcarith.c
-  src/jccoefct.c
-  src/jccolor.c
-  src/jcdctmgr.c
-  src/jchuff.c
-  src/jcinit.c
-  src/jcmainct.c
-  src/jcmarker.c
-  src/jcmaster.c
-  src/jcomapi.c
-  src/jcparam.c
-  src/jcprepct.c
-  src/jcsample.c
-  src/jctrans.c
-  src/jdapimin.c
-  src/jdapistd.c
-  src/jdarith.c
-  src/jdatadst.c
-  src/jdatasrc.c
-  src/jdcoefct.c
-  src/jdcolor.c
-  src/jdct.h
-  src/jddctmgr.c
-  src/jdhuff.c
-  src/jdinput.c
-  src/jdmainct.c
-  src/jdmarker.c
-  src/jdmaster.c
-  src/jdmerge.c
-  src/jdpostct.c
-  src/jdsample.c
-  src/jdtrans.c
-  src/jerror.c
-  src/jfdctflt.c
-  src/jfdctfst.c
-  src/jfdctint.c
-  src/jidctflt.c
-  src/jidctfst.c
-  src/jidctint.c
-  src/jinclude.h
-  src/jmemmgr.c
-  src/jmemnobs.c
-  src/jmemsys.h
-  src/jquant1.c
-  src/jquant2.c
-  src/jutils.c
-  src/jversion.h
-  src/transupp.c
-  src/transupp.h
-)
-
-target_include_directories(jpeg PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
-target_include_directories(jpeg PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src")
-disable_compiler_warnings_for_target(jpeg)
-
-add_library(JPEG::JPEG ALIAS jpeg)
diff --git a/dep/libjpeg/README b/dep/libjpeg/README
deleted file mode 100644
index 0f4ee1522..000000000
--- a/dep/libjpeg/README
+++ /dev/null
@@ -1,374 +0,0 @@
-The Independent JPEG Group's JPEG software
-==========================================
-
-README for release 9f of 14-Jan-2024
-====================================
-
-This distribution contains the ninth public release of the Independent JPEG
-Group's free JPEG software.  You are welcome to redistribute this software and
-to use it for any purpose, subject to the conditions under LEGAL ISSUES, below.
-
-This software is the work of Tom Lane, Guido Vollbeding, Philip Gladstone,
-Bill Allombert, Jim Boucher, Lee Crocker, Bob Friesenhahn, Ben Jackson,
-John Korejwa, Julian Minguillon, Luis Ortiz, George Phillips, Davide Rossi,
-Ge' Weijers, and other members of the Independent JPEG Group.
-
-IJG is not affiliated with the ISO/IEC JTC1/SC29/WG1 standards committee
-(previously known as JPEG, together with ITU-T SG16).
-
-
-DOCUMENTATION ROADMAP
-=====================
-
-This file contains the following sections:
-
-OVERVIEW            General description of JPEG and the IJG software.
-LEGAL ISSUES        Copyright, lack of warranty, terms of distribution.
-REFERENCES          Where to learn more about JPEG.
-ARCHIVE LOCATIONS   Where to find newer versions of this software.
-ACKNOWLEDGMENTS     Special thanks.
-FILE FORMAT WARS    Software *not* to get.
-TO DO               Plans for future IJG releases.
-
-Other documentation files in the distribution are:
-
-User documentation:
-  install.txt       How to configure and install the IJG software.
-  usage.txt         Usage instructions for cjpeg, djpeg, jpegtran,
-                    rdjpgcom, and wrjpgcom.
-  *.1               Unix-style man pages for programs (same info as usage.txt).
-  wizard.txt        Advanced usage instructions for JPEG wizards only.
-  cdaltui.txt       Description of alternate user interface for cjpeg/djpeg.
-  change.log        Version-to-version change highlights.
-Programmer and internal documentation:
-  libjpeg.txt       How to use the JPEG library in your own programs.
-  example.c         Sample code for calling the JPEG library.
-  structure.txt     Overview of the JPEG library's internal structure.
-  filelist.txt      Road map of IJG files.
-  coderules.txt     Coding style rules --- please read if you contribute code.
-
-Please read at least the files install.txt and usage.txt.  Some information
-can also be found in the JPEG FAQ (Frequently Asked Questions) article.  See
-ARCHIVE LOCATIONS below to find out where to obtain the FAQ article.
-
-If you want to understand how the JPEG code works, we suggest reading one or
-more of the REFERENCES, then looking at the documentation files (in roughly
-the order listed) before diving into the code.
-
-
-OVERVIEW
-========
-
-This package contains C software to implement JPEG image encoding, decoding,
-and transcoding.  JPEG (pronounced "jay-peg") is a standardized compression
-method for full-color and grayscale images.
-
-This software implements JPEG baseline, extended-sequential, and progressive
-compression processes.  Provision is made for supporting all variants of these
-processes, although some uncommon parameter settings aren't implemented yet.
-We have made no provision for supporting the hierarchical or lossless
-processes defined in the standard.
-
-We provide a set of library routines for reading and writing JPEG image files,
-plus two sample applications "cjpeg" and "djpeg", which use the library to
-perform conversion between JPEG and some other popular image file formats.
-The library is intended to be reused in other applications.
-
-In order to support file conversion and viewing software, we have included
-considerable functionality beyond the bare JPEG coding/decoding capability;
-for example, the color quantization modules are not strictly part of JPEG
-decoding, but they are essential for output to colormapped file formats or
-colormapped displays.  These extra functions can be compiled out of the
-library if not required for a particular application.
-
-We have also included "jpegtran", a utility for lossless transcoding between
-different JPEG processes, and "rdjpgcom" and "wrjpgcom", two simple
-applications for inserting and extracting textual comments in JFIF files.
-
-The emphasis in designing this software has been on achieving portability and
-flexibility, while also making it fast enough to be useful.  In particular,
-the software is not intended to be read as a tutorial on JPEG.  (See the
-REFERENCES section for introductory material.)  Rather, it is intended to
-be reliable, portable, industrial-strength code.  We do not claim to have
-achieved that goal in every aspect of the software, but we strive for it.
-
-We welcome the use of this software as a component of commercial products.
-No royalty is required, but we do ask for an acknowledgement in product
-documentation, as described under LEGAL ISSUES.
-
-
-LEGAL ISSUES
-============
-
-In plain English:
-
-1. We don't promise that this software works.  (But if you find any bugs,
-   please let us know!)
-2. You can use this software for whatever you want.  You don't have to pay us.
-3. You may not pretend that you wrote this software.  If you use it in a
-   program, you must acknowledge somewhere in your documentation that
-   you've used the IJG code.
-
-In legalese:
-
-The authors make NO WARRANTY or representation, either express or implied,
-with respect to this software, its quality, accuracy, merchantability, or
-fitness for a particular purpose.  This software is provided "AS IS", and you,
-its user, assume the entire risk as to its quality and accuracy.
-
-This software is copyright (C) 1991-2024, Thomas G. Lane, Guido Vollbeding.
-All Rights Reserved except as specified below.
-
-Permission is hereby granted to use, copy, modify, and distribute this
-software (or portions thereof) for any purpose, without fee, subject to these
-conditions:
-(1) If any part of the source code for this software is distributed, then this
-README file must be included, with this copyright and no-warranty notice
-unaltered; and any additions, deletions, or changes to the original files
-must be clearly indicated in accompanying documentation.
-(2) If only executable code is distributed, then the accompanying
-documentation must state that "this software is based in part on the work of
-the Independent JPEG Group".
-(3) Permission for use of this software is granted only if the user accepts
-full responsibility for any undesirable consequences; the authors accept
-NO LIABILITY for damages of any kind.
-
-These conditions apply to any software derived from or based on the IJG code,
-not just to the unmodified library.  If you use our work, you ought to
-acknowledge us.
-
-Permission is NOT granted for the use of any IJG author's name or company name
-in advertising or publicity relating to this software or products derived from
-it.  This software may be referred to only as "the Independent JPEG Group's
-software".
-
-We specifically permit and encourage the use of this software as the basis of
-commercial products, provided that all warranty or liability claims are
-assumed by the product vendor.
-
-
-The Unix configuration script "configure" was produced with GNU Autoconf.
-It is copyright by the Free Software Foundation but is freely distributable.
-The same holds for its supporting scripts (config.guess, config.sub,
-ltmain.sh).  Another support script, install-sh, is copyright by X Consortium
-but is also freely distributable.
-
-
-REFERENCES
-==========
-
-We recommend reading one or more of these references before trying to
-understand the innards of the JPEG software.
-
-The best short technical introduction to the JPEG compression algorithm is
-	Wallace, Gregory K.  "The JPEG Still Picture Compression Standard",
-	Communications of the ACM, April 1991 (vol. 34 no. 4), pp. 30-44.
-(Adjacent articles in that issue discuss MPEG motion picture compression,
-applications of JPEG, and related topics.)  If you don't have the CACM issue
-handy, a PDF file containing a revised version of Wallace's article is
-available at https://www.ijg.org/files/Wallace.JPEG.pdf.  The file (actually
-a preprint for an article that appeared in IEEE Trans. Consumer Electronics)
-omits the sample images that appeared in CACM, but it includes corrections
-and some added material.  Note: the Wallace article is copyright ACM and IEEE,
-and it may not be used for commercial purposes.
-
-A somewhat less technical, more leisurely introduction to JPEG can be found in
-"The Data Compression Book" by Mark Nelson and Jean-loup Gailly, published by
-M&T Books (New York), 2nd ed. 1996, ISBN 1-55851-434-1.  This book provides
-good explanations and example C code for a multitude of compression methods
-including JPEG.  It is an excellent source if you are comfortable reading C
-code but don't know much about data compression in general.  The book's JPEG
-sample code is far from industrial-strength, but when you are ready to look
-at a full implementation, you've got one here...
-
-The best currently available description of JPEG is the textbook "JPEG Still
-Image Data Compression Standard" by William B. Pennebaker and Joan L.
-Mitchell, published by Van Nostrand Reinhold, 1993, ISBN 0-442-01272-1.
-Price US$59.95, 638 pp.  The book includes the complete text of the ISO JPEG
-standards (DIS 10918-1 and draft DIS 10918-2).
-Although this is by far the most detailed and comprehensive exposition of
-JPEG publicly available, we point out that it is still missing an explanation
-of the most essential properties and algorithms of the underlying DCT
-technology.
-If you think that you know about DCT-based JPEG after reading this book,
-then you are in delusion.  The real fundamentals and corresponding potential
-of DCT-based JPEG are not publicly known so far, and that is the reason for
-all the mistaken developments taking place in the image coding domain.
-
-The original JPEG standard is divided into two parts, Part 1 being the actual
-specification, while Part 2 covers compliance testing methods.  Part 1 is
-titled "Digital Compression and Coding of Continuous-tone Still Images,
-Part 1: Requirements and guidelines" and has document numbers ISO/IEC IS
-10918-1, ITU-T T.81.  Part 2 is titled "Digital Compression and Coding of
-Continuous-tone Still Images, Part 2: Compliance testing" and has document
-numbers ISO/IEC IS 10918-2, ITU-T T.83.
-IJG JPEG 8 introduced an implementation of the JPEG SmartScale extension
-which is specified in two documents:  A contributed document at ITU and ISO
-with title "ITU-T JPEG-Plus Proposal for Extending ITU-T T.81 for Advanced
-Image Coding", April 2006, Geneva, Switzerland.  The latest version of this
-document is Revision 3.  And a contributed document ISO/IEC JTC1/SC29/WG1 N
-5799 with title "Evolution of JPEG", June/July 2011, Berlin, Germany.
-IJG JPEG 9 introduces a reversible color transform for improved lossless
-compression which is described in a contributed document ISO/IEC JTC1/SC29/
-WG1 N 6080 with title "JPEG 9 Lossless Coding", June/July 2012, Paris, France.
-
-The JPEG standard does not specify all details of an interchangeable file
-format.  For the omitted details we follow the "JFIF" conventions, version 2.
-JFIF version 1 has been adopted as Recommendation ITU-T T.871 (05/2011) :
-Information technology - Digital compression and coding of continuous-tone
-still images: JPEG File Interchange Format (JFIF).  It is available as a
-free download in PDF file format from https://www.itu.int/rec/T-REC-T.871.
-A PDF file of the older JFIF document is available at
-https://www.w3.org/Graphics/JPEG/jfif3.pdf.
-
-The TIFF 6.0 file format specification can be obtained by FTP from
-ftp://ftp.sgi.com/graphics/tiff/TIFF6.ps.gz.  The JPEG incorporation scheme
-found in the TIFF 6.0 spec of 3-June-92 has a number of serious problems.
-IJG does not recommend use of the TIFF 6.0 design (TIFF Compression tag 6).
-Instead, we recommend the JPEG design proposed by TIFF Technical Note #2
-(Compression tag 7).  Copies of this Note can be obtained from
-https://www.ijg.org/files/.  It is expected that the next revision
-of the TIFF spec will replace the 6.0 JPEG design with the Note's design.
-Although IJG's own code does not support TIFF/JPEG, the free libtiff library
-uses our library to implement TIFF/JPEG per the Note.
-
-
-ARCHIVE LOCATIONS
-=================
-
-The "official" archive site for this software is www.ijg.org.
-The most recent released version can always be found there in
-directory "files".  This particular version will be archived
-in Windows-compatible "zip" archive format as
-https://www.ijg.org/files/jpegsr9f.zip, and
-in Unix-compatible "tar.gz" archive format as
-https://www.ijg.org/files/jpegsrc.v9f.tar.gz.
-
-The JPEG FAQ (Frequently Asked Questions) article is a source of some
-general information about JPEG.
-It is available on the World Wide Web at http://www.faqs.org/faqs/jpeg-faq/
-and other news.answers archive sites, including the official news.answers
-archive at rtfm.mit.edu: ftp://rtfm.mit.edu/pub/usenet/news.answers/jpeg-faq/.
-If you don't have Web or FTP access, send e-mail to mail-server@rtfm.mit.edu
-with body
-	send usenet/news.answers/jpeg-faq/part1
-	send usenet/news.answers/jpeg-faq/part2
-
-
-ACKNOWLEDGMENTS
-===============
-
-Thank to Juergen Bruder for providing me with a copy of the common DCT
-algorithm article, only to find out that I had come to the same result
-in a more direct and comprehensible way with a more generative approach.
-
-Thank to Istvan Sebestyen and Joan L. Mitchell for inviting me to the
-ITU JPEG (Study Group 16) meeting in Geneva, Switzerland.
-
-Thank to Thomas Wiegand and Gary Sullivan for inviting me to the
-Joint Video Team (MPEG & ITU) meeting in Geneva, Switzerland.
-
-Thank to Thomas Richter and Daniel Lee for inviting me to the
-ISO/IEC JTC1/SC29/WG1 (previously known as JPEG, together with ITU-T SG16)
-meeting in Berlin, Germany.
-
-Thank to John Korejwa and Massimo Ballerini for inviting me to
-fruitful consultations in Boston, MA and Milan, Italy.
-
-Thank to Hendrik Elstner, Roland Fassauer, Simone Zuck, Guenther
-Maier-Gerber, Walter Stoeber, Fred Schmitz, and Norbert Braunagel
-for corresponding business development.
-
-Thank to Nico Zschach and Dirk Stelling of the technical support team
-at the Digital Images company in Halle for providing me with extra
-equipment for configuration tests.
-
-Thank to Richard F. Lyon (then of Foveon Inc.) for fruitful
-communication about JPEG configuration in Sigma Photo Pro software.
-
-Thank to Andrew Finkenstadt for hosting the ijg.org site.
-
-Thank to Thomas G. Lane for the original design and development
-of this singular software package.
-
-Thank to Lars Goehler, Andreas Heinecke, Sebastian Fuss,
-Yvonne Roebert, Andrej Werner, Ulf-Dietrich Braumann,
-and Nina Ssymank for support and public relations.
-
-
-FILE FORMAT WARS
-================
-
-The ISO/IEC JTC1/SC29/WG1 standards committee (previously known as JPEG,
-together with ITU-T SG16) currently promotes different formats containing
-the name "JPEG" which is misleading because these formats are incompatible
-with original DCT-based JPEG and are based on faulty technologies.
-IJG therefore does not and will not support such momentary mistakes
-(see REFERENCES).
-There exist also distributions under the name "OpenJPEG" promoting such
-kind of formats which is misleading because they don't support original
-JPEG images.
-We have no sympathy for the promotion of inferior formats.  Indeed, one of
-the original reasons for developing this free software was to help force
-convergence on common, interoperable format standards for JPEG files.
-Don't use an incompatible file format!
-(In any case, our decoder will remain capable of reading existing JPEG
-image files indefinitely.)
-
-The ISO committee pretends to be "responsible for the popular JPEG" in their
-public reports which is not true because they don't respond to actual
-requirements for the maintenance of the original JPEG specification.
-Furthermore, the ISO committee pretends to "ensure interoperability" with
-their standards which is not true because their "standards" support only
-application-specific and proprietary use cases and contain mathematically
-incorrect code.
-
-There are currently different distributions in circulation containing the
-name "libjpeg" which is misleading because they don't have the features and
-are incompatible with formats supported by actual IJG libjpeg distributions.
-One of those fakes is released by members of the ISO committee and just uses
-the name of libjpeg for misdirection of people, similar to the abuse of the
-name JPEG as described above, while having nothing in common with actual IJG
-libjpeg distributions and containing mathematically incorrect code.
-The other one claims to be a "derivative" or "fork" of the original libjpeg,
-but violates the license conditions as described under LEGAL ISSUES above
-and violates basic C programming properties.
-We have no sympathy for the release of misleading, incorrect and illegal
-distributions derived from obsolete code bases.
-Don't use an obsolete code base!
-
-According to the UCC (Uniform Commercial Code) law, IJG has the lawful and
-legal right to foreclose on certain standardization bodies and other
-institutions or corporations that knowingly perform substantial and
-systematic deceptive acts and practices, fraud, theft, and damaging of the
-value of the people of this planet without their knowing, willing and
-intentional consent.
-The titles, ownership, and rights of these institutions and all their assets
-are now duly secured and held in trust for the free people of this planet.
-People of the planet, on every country, may have a financial interest in
-the assets of these former principals, agents, and beneficiaries of the
-foreclosed institutions and corporations.
-IJG asserts what is: that each man, woman, and child has unalienable value
-and rights granted and deposited in them by the Creator and not any one of
-the people is subordinate to any artificial principality, corporate fiction
-or the special interest of another without their appropriate knowing,
-willing and intentional consent made by contract or accommodation agreement.
-IJG expresses that which already was.
-The people have already determined and demanded that public administration
-entities, national governments, and their supporting judicial systems must
-be fully transparent, accountable, and liable.
-IJG has secured the value for all concerned free people of the planet.
-
-A partial list of foreclosed institutions and corporations ("Hall of Shame")
-is currently prepared and will be published later.
-
-
-TO DO
-=====
-
-Version 9 is the second release of a new generation JPEG standard
-to overcome the limitations of the original JPEG specification,
-and is the first true source reference JPEG codec.
-More features are being prepared for coming releases...
-
-Please send bug reports, offers of help, etc. to jpeg-info@ijg.org.
diff --git a/dep/libjpeg/change.log b/dep/libjpeg/change.log
deleted file mode 100644
index eae011312..000000000
--- a/dep/libjpeg/change.log
+++ /dev/null
@@ -1,527 +0,0 @@
-CHANGE LOG for Independent JPEG Group's JPEG software
-
-
-Version 9f  14-Jan-2024
------------------------
-
-Add build system for C++Builder/RAD Studio.
-
-Add build system for Xcode (beside configure).
-
-Add ARM64EC (Emulation Compatible) platform support in the
-Visual Studio build.
-
-
-Version 9e  16-Jan-2022
------------------------
-
-Include alternate user interface files for cjpeg/djpeg.
-
-jcparam.c: change default chrominance DC quantization factor
-for lossless support.  Note: Requires rebuild of test images.
-
-rdgif.c, cderror.h: add sanity check for GIF image dimensions.
-Thank to Casper Sun for cjpeg potential vulnerability report.
-
-Add ARM and ARM64 platform support in the Visual Studio build.
-
-
-Version 9d  12-Jan-2020
------------------------
-
-Optimize the optimal Huffman code table generation to produce
-slightly smaller files.  Thank to John Korejwa for suggestion.
-Note: Requires rebuild of testimgp.jpg.
-
-Decoding Huffman: Use default tables if tables are not defined.
-Thank to Simone Azzalin for report (Motion JPEG),
-and to Martin Strunz for hint.
-
-Add sanity check in optimal Huffman code table generation.
-Thank to Adam Farley for suggestion.
-
-rdtarga.c: use read_byte(), with EOF check, instead of getc()
-in read_*_pixel().
-Thank to Chijin Zhou for cjpeg potential vulnerability report.
-
-jmemnobs.c: respect the max_memory_to_use setting in
-jpeg_mem_available() computation.  Thank to Sheng Shu and
-Dongdong She for djpeg potential vulnerability report.
-
-jdarith.c, jdhuff.c: avoid left shift of negative value
-compiler warning in decode_mcu_AC_refine().
-Thank to Indu Bhagat for suggestion.
-
-Add x64 (64-bit) platform support, avoid compiler warnings.
-Thank to Jonathan Potter, Feiyun Wang, and Sheng Shu for suggestion.
-
-Adjust libjpeg version specification for pkg-config file.
-Thank to Chen Chen for suggestion.
-
-Restore GIF read and write support from libjpeg version 6a.
-Thank to Wolfgang Werner (W.W.) Heinz for suggestion.
-
-Improve consistency in raw (downsampled) image data processing mode.
-Thank to Zhongyuan Zhou for hint.
-
-Avoid out of bounds array read (AC derived table pointers)
-in start pass in jdhuff.c.  Thank to Peng Li for report.
-
-Improve code sanity (jdhuff.c).
-Thank to Reza Mirzazade farkhani for reports.
-
-Add jpegtran -drop option; add options to the crop extension and wipe
-to fill the extra area with content from the source image region,
-instead of gray out.
-
-
-Version 9c  14-Jan-2018
------------------------
-
-jpegtran: add an option to the -wipe switch to fill the region
-with the average of adjacent blocks, instead of gray out.
-Thank to Caitlyn Feddock and Maddie Ziegler for inspiration.
-
-Make range extension bits adjustable (in jpegint.h).
-Thank to Robin Watts for suggestion.
-
-Provide macros for fflush() and ferror() in jinclude.h in order
-to facilitate adaption by applications using an own FILE class.
-Thank to Gerhard Huber for suggestion.
-
-Add libjpeg pkg-config file.  Thank to Mark Lavi, Vincent Torri,
-Patrick McMunn, and Huw Davies for suggestion.
-
-Add sanity checks in cjpeg image reader modules.
-Thank to Bingchang, Liu for reports.
-
-
-Version 9b  17-Jan-2016
------------------------
-
-Improvements and optimizations in DCT and color calculations.
-Normalize range limit array composition and access pattern.
-Thank to Sia Furler and Maddie Ziegler for inspiration.
-
-Use merged upsample with scaled DCT sizes larger than 8.
-Thank to Taylor Hatala for inspiration.
-
-Check for excessive comment lengths in argument parsing in wrjpgcom.c.
-Thank to Julian Cohen for hint.
-
-Add makefile.b32 for use with Borland C++ 32-bit (bcc32).
-Thank to Joe Slater for contribution.
-
-Document 'f' specifier for jpegtran -crop specification.
-Thank to Michele Martone for suggestion.
-
-Use defined value from header instead of hardwired number in rdswitch.c.
-Thank to Robert Sprowson for hint.
-
-
-Version 9a  19-Jan-2014
------------------------
-
-Add support for wide gamut color spaces (JFIF version 2).
-Improve clarity and accuracy in color conversion modules.
-Note: Requires rebuild of test images.
-
-Extend the bit depth support to all values from 8 to 12
-(BITS_IN_JSAMPLE configuration option in jmorecfg.h).
-jpegtran now supports N bits sample data precision with all N from 8 to 12
-in a single instance.  Thank to Roland Fassauer for inspiration.
-
-Try to resolve issues with new boolean type definition.
-Thank also to v4hn for suggestion.
-
-Enable option to use default Huffman tables for lossless compression
-(for hardware solution), and in this case improve lossless RGB compression
-with reversible color transform.  Thank to Benny Alexandar for hint.
-
-Extend the entropy decoding structure, so that extraneous bytes between
-compressed scan data and following marker can be reported correctly.
-Thank to Nigel Tao for hint.
-
-Add jpegtran -wipe option and extension for -crop.
-Thank to Andrew Senior, David Clunie, and Josef Schmid for suggestion.
-
-
-Version 9  13-Jan-2013
-----------------------
-
-Add cjpeg -rgb1 option to create an RGB JPEG file, and insert
-a simple reversible color transform into the processing which
-significantly improves the compression.
-The recommended command for lossless coding of RGB images is now
-cjpeg -rgb1 -block 1 -arithmetic.
-As said, this option improves the compression significantly, but
-the files are not compatible with JPEG decoders prior to IJG v9
-due to the included color transform.
-The used color transform and marker signaling is compatible with
-other JPEG standards (e.g., JPEG-LS part 2).
-
-Remove the automatic de-ANSI-fication support (Automake 1.12).
-Thank also to Nitin A Kamble for suggestion.
-
-Add remark for jpeg_mem_dest() in jdatadst.c.
-Thank to Elie-Gregoire Khoury for the hint.
-
-Support files with invalid component identifiers (created
-by Adobe PDF).  Thank to Robin Watts for the suggestion.
-
-Adapt full buffer case in jcmainct.c for use with scaled DCT.
-Thank to Sergii Biloshytskyi for the suggestion.
-
-Add type identifier for declaration of noreturn functions.
-Thank to Brett L. Moore for the suggestion.
-
-Correct argument type in format string, avoid compiler warnings.
-Thank to Vincent Torri for hint.
-
-Add missing #include directives in configuration checks, avoid
-configuration errors.  Thank to John Spencer for the hint.
-
-
-Version 8d  15-Jan-2012
------------------------
-
-Add cjpeg -rgb option to create RGB JPEG files.
-Using this switch suppresses the conversion from RGB
-colorspace input to the default YCbCr JPEG colorspace.
-This feature allows true lossless JPEG coding of RGB color images.
-The recommended command for this purpose is currently
-cjpeg -rgb -block 1 -arithmetic.
-SmartScale capable decoder (introduced with IJG JPEG 8) required.
-Thank to Michael Koch for the initial suggestion.
-
-Add option to disable the region adjustment in the transupp crop code.
-Thank to Jeffrey Friedl for the suggestion.
-
-Thank to Richard Jones and Edd Dawson for various minor corrections.
-
-Thank to Akim Demaille for configure.ac cleanup.
-
-
-Version 8c  16-Jan-2011
------------------------
-
-Add option to compression library and cjpeg (-block N) to use
-different DCT block size.
-All N from 1 to 16 are possible.  Default is 8 (baseline format).
-Larger values produce higher compression,
-smaller values produce higher quality.
-SmartScale capable decoder (introduced with IJG JPEG 8) required.
-
-
-Version 8b  16-May-2010
------------------------
-
-Repair problem in new memory source manager with corrupt JPEG data.
-Thank to Ted Campbell and Samuel Chun for the report.
-
-Repair problem in Makefile.am test target.
-Thank to anonymous user for the report.
-
-Support MinGW installation with automatic configure.
-Thank to Volker Grabsch for the suggestion.
-
-
-Version 8a  28-Feb-2010
------------------------
-
-Writing tables-only datastreams via jpeg_write_tables works again.
-
-Support 32-bit BMPs (RGB image with Alpha channel) for read in cjpeg.
-Thank to Brett Blackham for the suggestion.
-
-Improve accuracy in floating point IDCT calculation.
-Thank to Robert Hooke for the hint.
-
-
-Version 8  10-Jan-2010
-----------------------
-
-jpegtran now supports the same -scale option as djpeg for "lossless" resize.
-An implementation of the JPEG SmartScale extension is required for this
-feature.  A (draft) specification of the JPEG SmartScale extension is
-available as a contributed document at ITU and ISO.  Revision 2 or later
-of the document is required (latest document version is Revision 3).
-The SmartScale extension will enable more features beside lossless resize
-in future implementations, as described in the document (new compression
-options).
-
-Add sanity check in BMP reader module to avoid cjpeg crash for empty input
-image (thank to Isaev Ildar of ISP RAS, Moscow, RU for reporting this error).
-
-Add data source and destination managers for read from and write to
-memory buffers.  New API functions jpeg_mem_src and jpeg_mem_dest.
-Thank to Roberto Boni from Italy for the suggestion.
-
-
-Version 7  27-Jun-2009
-----------------------
-
-New scaled DCTs implemented.
-djpeg now supports scalings N/8 with all N from 1 to 16.
-cjpeg now supports scalings 8/N with all N from 1 to 16.
-Scaled DCTs with size larger than 8 are now also used for resolving the
-common 2x2 chroma subsampling case without additional spatial resampling.
-Separate spatial resampling for those kind of files is now only necessary
-for N>8 scaling cases.
-Furthermore, separate scaled DCT functions are provided for direct resolving
-of the common asymmetric subsampling cases (2x1 and 1x2) without additional
-spatial resampling.
-
-cjpeg -quality option has been extended for support of separate quality
-settings for luminance and chrominance (or in general, for every provided
-quantization table slot).
-New API function jpeg_default_qtables() and q_scale_factor array in library.
-
-Added -nosmooth option to cjpeg, complementary to djpeg.
-New variable "do_fancy_downsampling" in library, complement to fancy
-upsampling.  Fancy upsampling now uses direct DCT scaling with sizes
-larger than 8.  The old method is not reversible and has been removed.
-
-Support arithmetic entropy encoding and decoding.
-Added files jaricom.c, jcarith.c, jdarith.c.
-
-Straighten the file structure:
-Removed files jidctred.c, jcphuff.c, jchuff.h, jdphuff.c, jdhuff.h.
-
-jpegtran has a new "lossless" cropping feature.
-
-Implement -perfect option in jpegtran, new API function
-jtransform_perfect_transform() in transupp. (DP 204_perfect.dpatch)
-
-Better error messages for jpegtran fopen failure.
-(DP 203_jpegtran_errmsg.dpatch)
-
-Fix byte order issue with 16bit PPM/PGM files in rdppm.c/wrppm.c:
-according to Netpbm, the de facto standard implementation of the PNM formats,
-the most significant byte is first. (DP 203_rdppm.dpatch)
-
-Add -raw option to rdjpgcom not to mangle the output.
-(DP 205_rdjpgcom_raw.dpatch)
-
-Make rdjpgcom locale aware. (DP 201_rdjpgcom_locale.dpatch)
-
-Add extern "C" to jpeglib.h.
-This avoids the need to put extern "C" { ... } around #include "jpeglib.h"
-in your C++ application.  Defining the symbol DONT_USE_EXTERN_C in the
-configuration prevents this. (DP 202_jpeglib.h_c++.dpatch)
-
-
-Version 6b  27-Mar-1998
------------------------
-
-jpegtran has new features for lossless image transformations (rotation
-and flipping) as well as "lossless" reduction to grayscale.
-
-jpegtran now copies comments by default; it has a -copy switch to enable
-copying all APPn blocks as well, or to suppress comments.  (Formerly it
-always suppressed comments and APPn blocks.)  jpegtran now also preserves
-JFIF version and resolution information.
-
-New decompressor library feature: COM and APPn markers found in the input
-file can be saved in memory for later use by the application.  (Before,
-you had to code this up yourself with a custom marker processor.)
-
-There is an unused field "void * client_data" now in compress and decompress
-parameter structs; this may be useful in some applications.
-
-JFIF version number information is now saved by the decoder and accepted by
-the encoder.  jpegtran uses this to copy the source file's version number,
-to ensure "jpegtran -copy all" won't create bogus files that contain JFXX
-extensions but claim to be version 1.01.  Applications that generate their
-own JFXX extension markers also (finally) have a supported way to cause the
-encoder to emit JFIF version number 1.02.
-
-djpeg's trace mode reports JFIF 1.02 thumbnail images as such, rather
-than as unknown APP0 markers.
-
-In -verbose mode, djpeg and rdjpgcom will try to print the contents of
-APP12 markers as text.  Some digital cameras store useful text information
-in APP12 markers.
-
-Handling of truncated data streams is more robust: blocks beyond the one in
-which the error occurs will be output as uniform gray, or left unchanged
-if decoding a progressive JPEG.  The appearance no longer depends on the
-Huffman tables being used.
-
-Huffman tables are checked for validity much more carefully than before.
-
-To avoid the Unisys LZW patent, djpeg's GIF output capability has been
-changed to produce "uncompressed GIFs", and cjpeg's GIF input capability
-has been removed altogether.  We're not happy about it either, but there
-seems to be no good alternative.
-
-The configure script now supports building libjpeg as a shared library
-on many flavors of Unix (all the ones that GNU libtool knows how to
-build shared libraries for).  Use "./configure --enable-shared" to
-try this out.
-
-New jconfig file and makefiles for Microsoft Visual C++ and Developer Studio.
-Also, a jconfig file and a build script for Metrowerks CodeWarrior
-on Apple Macintosh.  makefile.dj has been updated for DJGPP v2, and there
-are miscellaneous other minor improvements in the makefiles.
-
-jmemmac.c now knows how to create temporary files following Mac System 7
-conventions.
-
-djpeg's -map switch is now able to read raw-format PPM files reliably.
-
-cjpeg -progressive -restart no longer generates any unnecessary DRI markers.
-
-Multiple calls to jpeg_simple_progression for a single JPEG object
-no longer leak memory.
-
-
-Version 6a  7-Feb-96
---------------------
-
-Library initialization sequence modified to detect version mismatches
-and struct field packing mismatches between library and calling application.
-This change requires applications to be recompiled, but does not require
-any application source code change.
-
-All routine declarations changed to the style "GLOBAL(type) name ...",
-that is, GLOBAL, LOCAL, METHODDEF, EXTERN are now macros taking the
-routine's return type as an argument.  This makes it possible to add
-Microsoft-style linkage keywords to all the routines by changing just
-these macros.  Note that any application code that was using these macros
-will have to be changed.
-
-DCT coefficient quantization tables are now stored in normal array order
-rather than zigzag order.  Application code that calls jpeg_add_quant_table,
-or otherwise manipulates quantization tables directly, will need to be
-changed.  If you need to make such code work with either older or newer
-versions of the library, a test like "#if JPEG_LIB_VERSION >= 61" is
-recommended.
-
-djpeg's trace capability now dumps DQT tables in natural order, not zigzag
-order.  This allows the trace output to be made into a "-qtables" file
-more easily.
-
-New system-dependent memory manager module for use on Apple Macintosh.
-
-Fix bug in cjpeg's -smooth option: last one or two scanlines would be
-duplicates of the prior line unless the image height mod 16 was 1 or 2.
-
-Repair minor problems in VMS, BCC, MC6 makefiles.
-
-New configure script based on latest GNU Autoconf.
-
-Correct the list of include files needed by MetroWerks C for ccommand().
-
-Numerous small documentation updates.
-
-
-Version 6  2-Aug-95
--------------------
-
-Progressive JPEG support: library can read and write full progressive JPEG
-files.  A "buffered image" mode supports incremental decoding for on-the-fly
-display of progressive images.  Simply recompiling an existing IJG-v5-based
-decoder with v6 should allow it to read progressive files, though of course
-without any special progressive display.
-
-New "jpegtran" application performs lossless transcoding between different
-JPEG formats; primarily, it can be used to convert baseline to progressive
-JPEG and vice versa.  In support of jpegtran, the library now allows lossless
-reading and writing of JPEG files as DCT coefficient arrays.  This ability
-may be of use in other applications.
-
-Notes for programmers:
-* We changed jpeg_start_decompress() to be able to suspend; this makes all
-decoding modes available to suspending-input applications.  However,
-existing applications that use suspending input will need to be changed
-to check the return value from jpeg_start_decompress().  You don't need to
-do anything if you don't use a suspending data source.
-* We changed the interface to the virtual array routines: access_virt_array
-routines now take a count of the number of rows to access this time.  The
-last parameter to request_virt_array routines is now interpreted as the
-maximum number of rows that may be accessed at once, but not necessarily
-the height of every access.
-
-
-Version 5b  15-Mar-95
----------------------
-
-Correct bugs with grayscale images having v_samp_factor > 1.
-
-jpeg_write_raw_data() now supports output suspension.
-
-Correct bugs in "configure" script for case of compiling in
-a directory other than the one containing the source files.
-
-Repair bug in jquant1.c: sometimes didn't use as many colors as it could.
-
-Borland C makefile and jconfig file work under either MS-DOS or OS/2.
-
-Miscellaneous improvements to documentation.
-
-
-Version 5a  7-Dec-94
---------------------
-
-Changed color conversion roundoff behavior so that grayscale values are
-represented exactly.  (This causes test image files to change.)
-
-Make ordered dither use 16x16 instead of 4x4 pattern for a small quality
-improvement.
-
-New configure script based on latest GNU Autoconf.
-Fix configure script to handle CFLAGS correctly.
-Rename *.auto files to *.cfg, so that configure script still works if
-file names have been truncated for DOS.
-
-Fix bug in rdbmp.c: didn't allow for extra data between header and image.
-
-Modify rdppm.c/wrppm.c to handle 2-byte raw PPM/PGM formats for 12-bit data.
-
-Fix several bugs in rdrle.c.
-
-NEED_SHORT_EXTERNAL_NAMES option was broken.
-
-Revise jerror.h/jerror.c for more flexibility in message table.
-
-Repair oversight in jmemname.c NO_MKTEMP case: file could be there
-but unreadable.
-
-
-Version 5  24-Sep-94
---------------------
-
-Version 5 represents a nearly complete redesign and rewrite of the IJG
-software.  Major user-visible changes include:
-  * Automatic configuration simplifies installation for most Unix systems.
-  * A range of speed vs. image quality tradeoffs are supported.
-    This includes resizing of an image during decompression: scaling down
-    by a factor of 1/2, 1/4, or 1/8 is handled very efficiently.
-  * New programs rdjpgcom and wrjpgcom allow insertion and extraction
-    of text comments in a JPEG file.
-
-The application programmer's interface to the library has changed completely.
-Notable improvements include:
-  * We have eliminated the use of callback routines for handling the
-    uncompressed image data.  The application now sees the library as a
-    set of routines that it calls to read or write image data on a
-    scanline-by-scanline basis.
-  * The application image data is represented in a conventional interleaved-
-    pixel format, rather than as a separate array for each color channel.
-    This can save a copying step in many programs.
-  * The handling of compressed data has been cleaned up: the application can
-    supply routines to source or sink the compressed data.  It is possible to
-    suspend processing on source/sink buffer overrun, although this is not
-    supported in all operating modes.
-  * All static state has been eliminated from the library, so that multiple
-    instances of compression or decompression can be active concurrently.
-  * JPEG abbreviated datastream formats are supported, ie, quantization and
-    Huffman tables can be stored separately from the image data.
-  * And not only that, but the documentation of the library has improved
-    considerably!
-
-
-The last widely used release before the version 5 rewrite was version 4A of
-18-Feb-93.  Change logs before that point have been discarded, since they
-are not of much interest after the rewrite.
diff --git a/dep/libjpeg/include/jconfig.h b/dep/libjpeg/include/jconfig.h
deleted file mode 100644
index d1710ae7d..000000000
--- a/dep/libjpeg/include/jconfig.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * jconfig.txt
- *
- * Copyright (C) 1991-1994, Thomas G. Lane.
- * Modified 2009-2013 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file documents the configuration options that are required to
- * customize the JPEG software for a particular system.
- *
- * The actual configuration options for a particular installation are stored
- * in jconfig.h.  On many machines, jconfig.h can be generated automatically
- * or copied from one of the "canned" jconfig files that we supply.  But if
- * you need to generate a jconfig.h file by hand, this file tells you how.
- *
- * DO NOT EDIT THIS FILE --- IT WON'T ACCOMPLISH ANYTHING.
- * EDIT A COPY NAMED JCONFIG.H.
- */
-
-
-/*
- * These symbols indicate the properties of your machine or compiler.
- * #define the symbol if yes, #undef it if no.
- */
-
-/* Does your compiler support function prototypes?
- * (If not, you also need to use ansi2knr, see install.txt)
- */
-#define HAVE_PROTOTYPES
-
-/* Does your compiler support the declaration "unsigned char" ?
- * How about "unsigned short" ?
- */
-#define HAVE_UNSIGNED_CHAR
-#define HAVE_UNSIGNED_SHORT
-
-/* Define "void" as "char" if your compiler doesn't know about type void.
- * NOTE: be sure to define void such that "void *" represents the most general
- * pointer type, e.g., that returned by malloc().
- */
-/* #define void char */
-
-/* Define "const" as empty if your compiler doesn't know the "const" keyword.
- */
-/* #define const */
-
-/* Define this if an ordinary "char" type is unsigned.
- * If you're not sure, leaving it undefined will work at some cost in speed.
- * If you defined HAVE_UNSIGNED_CHAR then the speed difference is minimal.
- */
-#undef CHAR_IS_UNSIGNED
-
-/* Define this if your system has an ANSI-conforming <stddef.h> file.
- */
-#define HAVE_STDDEF_H
-
-/* Define this if your system has an ANSI-conforming <stdlib.h> file.
- */
-#define HAVE_STDLIB_H
-
-/* Define this if your system does not have an ANSI/SysV <string.h>,
- * but does have a BSD-style <strings.h>.
- */
-#undef NEED_BSD_STRINGS
-
-/* Define this if your system does not provide typedef size_t in any of the
- * ANSI-standard places (stddef.h, stdlib.h, or stdio.h), but places it in
- * <sys/types.h> instead.
- */
-#undef NEED_SYS_TYPES_H
-
-/* For 80x86 machines, you need to define NEED_FAR_POINTERS,
- * unless you are using a large-data memory model or 80386 flat-memory mode.
- * On less brain-damaged CPUs this symbol must not be defined.
- * (Defining this symbol causes large data structures to be referenced through
- * "far" pointers and to be allocated with a special version of malloc.)
- */
-#undef NEED_FAR_POINTERS
-
-/* Define this if your linker needs global names to be unique in less
- * than the first 15 characters.
- */
-#undef NEED_SHORT_EXTERNAL_NAMES
-
-/* Although a real ANSI C compiler can deal perfectly well with pointers to
- * unspecified structures (see "incomplete types" in the spec), a few pre-ANSI
- * and pseudo-ANSI compilers get confused.  To keep one of these bozos happy,
- * define INCOMPLETE_TYPES_BROKEN.  This is not recommended unless you
- * actually get "missing structure definition" warnings or errors while
- * compiling the JPEG code.
- */
-#undef INCOMPLETE_TYPES_BROKEN
-
-/* Define "boolean" as unsigned char, not enum, on Windows systems.
- */
-#ifdef _WIN32
-#ifndef __RPCNDR_H__		/* don't conflict if rpcndr.h already read */
-typedef unsigned char boolean;
-#endif
-#ifndef FALSE			/* in case these macros already exist */
-#define FALSE	0		/* values of boolean */
-#endif
-#ifndef TRUE
-#define TRUE	1
-#endif
-#define HAVE_BOOLEAN		/* prevent jmorecfg.h from redefining it */
-#endif
-
-
-/*
- * The following options affect code selection within the JPEG library,
- * but they don't need to be visible to applications using the library.
- * To minimize application namespace pollution, the symbols won't be
- * defined unless JPEG_INTERNALS has been defined.
- */
-
-#ifdef JPEG_INTERNALS
-
-/* Define this if your compiler implements ">>" on signed values as a logical
- * (unsigned) shift; leave it undefined if ">>" is a signed (arithmetic) shift,
- * which is the normal and rational definition.
- */
-#undef RIGHT_SHIFT_IS_UNSIGNED
-
-
-#endif /* JPEG_INTERNALS */
-
-
-/*
- * The remaining options do not affect the JPEG library proper,
- * but only the sample applications cjpeg/djpeg (see cjpeg.c, djpeg.c).
- * Other applications can ignore these.
- */
-
-#ifdef JPEG_CJPEG_DJPEG
-
-/* These defines indicate which image (non-JPEG) file formats are allowed. */
-
-#define BMP_SUPPORTED		/* BMP image file format */
-#define GIF_SUPPORTED		/* GIF image file format */
-#define PPM_SUPPORTED		/* PBMPLUS PPM/PGM image file format */
-#undef RLE_SUPPORTED		/* Utah RLE image file format */
-#define TARGA_SUPPORTED		/* Targa image file format */
-
-/* Define this if you want to name both input and output files on the command
- * line, rather than using stdout and optionally stdin.  You MUST do this if
- * your system can't cope with binary I/O to stdin/stdout.  See comments at
- * head of cjpeg.c or djpeg.c.
- */
-#undef TWO_FILE_COMMANDLINE
-
-/* Define this if your system needs explicit cleanup of temporary files.
- * This is crucial under MS-DOS, where the temporary "files" may be areas
- * of extended memory; on most other systems it's not as important.
- */
-#undef NEED_SIGNAL_CATCHER
-
-/* By default, we open image files with fopen(...,"rb") or fopen(...,"wb").
- * This is necessary on systems that distinguish text files from binary files,
- * and is harmless on most systems that don't.  If you have one of the rare
- * systems that complains about the "b" spec, define this symbol.
- */
-#undef DONT_USE_B_MODE
-
-/* Define this if you want percent-done progress reports from cjpeg/djpeg.
- */
-#undef PROGRESS_REPORT
-
-
-#endif /* JPEG_CJPEG_DJPEG */
diff --git a/dep/libjpeg/include/jerror.h b/dep/libjpeg/include/jerror.h
deleted file mode 100644
index db608b9c0..000000000
--- a/dep/libjpeg/include/jerror.h
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * jerror.h
- *
- * Copyright (C) 1994-1997, Thomas G. Lane.
- * Modified 1997-2018 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file defines the error and message codes for the JPEG library.
- * Edit this file to add new codes, or to translate the message strings to
- * some other language.
- * A set of error-reporting macros are defined too.  Some applications using
- * the JPEG library may wish to include this file to get the error codes
- * and/or the macros.
- */
-
-/*
- * To define the enum list of message codes, include this file without
- * defining macro JMESSAGE.  To create a message string table, include it
- * again with a suitable JMESSAGE definition (see jerror.c for an example).
- */
-#ifndef JMESSAGE
-#ifndef JERROR_H
-/* First time through, define the enum list */
-#define JMAKE_ENUM_LIST
-#else
-/* Repeated inclusions of this file are no-ops unless JMESSAGE is defined */
-#define JMESSAGE(code,string)
-#endif /* JERROR_H */
-#endif /* JMESSAGE */
-
-#ifdef JMAKE_ENUM_LIST
-
-typedef enum {
-
-#define JMESSAGE(code,string)	code ,
-
-#endif /* JMAKE_ENUM_LIST */
-
-JMESSAGE(JMSG_NOMESSAGE, "Bogus message code %d") /* Must be first entry! */
-
-/* For maintenance convenience, list is alphabetical by message code name */
-JMESSAGE(JERR_BAD_ALIGN_TYPE, "ALIGN_TYPE is wrong, please fix")
-JMESSAGE(JERR_BAD_ALLOC_CHUNK, "MAX_ALLOC_CHUNK is wrong, please fix")
-JMESSAGE(JERR_BAD_BUFFER_MODE, "Bogus buffer control mode")
-JMESSAGE(JERR_BAD_COMPONENT_ID, "Invalid component ID %d in SOS")
-JMESSAGE(JERR_BAD_CROP_SPEC, "Invalid crop request")
-JMESSAGE(JERR_BAD_DCT_COEF, "DCT coefficient out of range")
-JMESSAGE(JERR_BAD_DCTSIZE, "DCT scaled block size %dx%d not supported")
-JMESSAGE(JERR_BAD_DROP_SAMPLING,
-	 "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c")
-JMESSAGE(JERR_BAD_HUFF_TABLE, "Bogus Huffman table definition")
-JMESSAGE(JERR_BAD_IN_COLORSPACE, "Bogus input colorspace")
-JMESSAGE(JERR_BAD_J_COLORSPACE, "Bogus JPEG colorspace")
-JMESSAGE(JERR_BAD_LENGTH, "Bogus marker length")
-JMESSAGE(JERR_BAD_LIB_VERSION,
-	 "Wrong JPEG library version: library is %d, caller expects %d")
-JMESSAGE(JERR_BAD_MCU_SIZE, "Sampling factors too large for interleaved scan")
-JMESSAGE(JERR_BAD_POOL_ID, "Invalid memory pool code %d")
-JMESSAGE(JERR_BAD_PRECISION, "Unsupported JPEG data precision %d")
-JMESSAGE(JERR_BAD_PROGRESSION,
-	 "Invalid progressive parameters Ss=%d Se=%d Ah=%d Al=%d")
-JMESSAGE(JERR_BAD_PROG_SCRIPT,
-	 "Invalid progressive parameters at scan script entry %d")
-JMESSAGE(JERR_BAD_SAMPLING, "Bogus sampling factors")
-JMESSAGE(JERR_BAD_SCAN_SCRIPT, "Invalid scan script at entry %d")
-JMESSAGE(JERR_BAD_STATE, "Improper call to JPEG library in state %d")
-JMESSAGE(JERR_BAD_STRUCT_SIZE,
-	 "JPEG parameter struct mismatch: library thinks size is %u, caller expects %u")
-JMESSAGE(JERR_BAD_VIRTUAL_ACCESS, "Bogus virtual array access")
-JMESSAGE(JERR_BUFFER_SIZE, "Buffer passed to JPEG library is too small")
-JMESSAGE(JERR_CANT_SUSPEND, "Suspension not allowed here")
-JMESSAGE(JERR_CCIR601_NOTIMPL, "CCIR601 sampling not implemented yet")
-JMESSAGE(JERR_COMPONENT_COUNT, "Too many color components: %d, max %d")
-JMESSAGE(JERR_CONVERSION_NOTIMPL, "Unsupported color conversion request")
-JMESSAGE(JERR_DAC_INDEX, "Bogus DAC index %d")
-JMESSAGE(JERR_DAC_VALUE, "Bogus DAC value 0x%x")
-JMESSAGE(JERR_DHT_INDEX, "Bogus DHT index %d")
-JMESSAGE(JERR_DQT_INDEX, "Bogus DQT index %d")
-JMESSAGE(JERR_EMPTY_IMAGE, "Empty JPEG image (DNL not supported)")
-JMESSAGE(JERR_EMS_READ, "Read from EMS failed")
-JMESSAGE(JERR_EMS_WRITE, "Write to EMS failed")
-JMESSAGE(JERR_EOI_EXPECTED, "Didn't expect more than one scan")
-JMESSAGE(JERR_FILE_READ, "Input file read error")
-JMESSAGE(JERR_FILE_WRITE, "Output file write error --- out of disk space?")
-JMESSAGE(JERR_FRACT_SAMPLE_NOTIMPL, "Fractional sampling not implemented yet")
-JMESSAGE(JERR_HUFF_CLEN_OUTOFBOUNDS, "Huffman code size table out of bounds")
-JMESSAGE(JERR_HUFF_MISSING_CODE, "Missing Huffman code table entry")
-JMESSAGE(JERR_IMAGE_TOO_BIG, "Maximum supported image dimension is %u pixels")
-JMESSAGE(JERR_INPUT_EMPTY, "Empty input file")
-JMESSAGE(JERR_INPUT_EOF, "Premature end of input file")
-JMESSAGE(JERR_MISMATCHED_QUANT_TABLE,
-	 "Cannot transcode due to multiple use of quantization table %d")
-JMESSAGE(JERR_MISSING_DATA, "Scan script does not transmit all data")
-JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change")
-JMESSAGE(JERR_NOTIMPL, "Not implemented yet")
-JMESSAGE(JERR_NOT_COMPILED, "Requested feature was omitted at compile time")
-JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined")
-JMESSAGE(JERR_NO_BACKING_STORE, "Backing store not supported")
-JMESSAGE(JERR_NO_HUFF_TABLE, "Huffman table 0x%02x was not defined")
-JMESSAGE(JERR_NO_IMAGE, "JPEG datastream contains no image")
-JMESSAGE(JERR_NO_QUANT_TABLE, "Quantization table 0x%02x was not defined")
-JMESSAGE(JERR_NO_SOI, "Not a JPEG file: starts with 0x%02x 0x%02x")
-JMESSAGE(JERR_OUT_OF_MEMORY, "Insufficient memory (case %d)")
-JMESSAGE(JERR_QUANT_COMPONENTS,
-	 "Cannot quantize more than %d color components")
-JMESSAGE(JERR_QUANT_FEW_COLORS, "Cannot quantize to fewer than %d colors")
-JMESSAGE(JERR_QUANT_MANY_COLORS, "Cannot quantize to more than %d colors")
-JMESSAGE(JERR_SOF_BEFORE, "Invalid JPEG file structure: %s before SOF")
-JMESSAGE(JERR_SOF_DUPLICATE, "Invalid JPEG file structure: two SOF markers")
-JMESSAGE(JERR_SOF_NO_SOS, "Invalid JPEG file structure: missing SOS marker")
-JMESSAGE(JERR_SOF_UNSUPPORTED, "Unsupported JPEG process: SOF type 0x%02x")
-JMESSAGE(JERR_SOI_DUPLICATE, "Invalid JPEG file structure: two SOI markers")
-JMESSAGE(JERR_TFILE_CREATE, "Failed to create temporary file %s")
-JMESSAGE(JERR_TFILE_READ, "Read failed on temporary file")
-JMESSAGE(JERR_TFILE_SEEK, "Seek failed on temporary file")
-JMESSAGE(JERR_TFILE_WRITE,
-	 "Write failed on temporary file --- out of disk space?")
-JMESSAGE(JERR_TOO_LITTLE_DATA, "Application transferred too few scanlines")
-JMESSAGE(JERR_UNKNOWN_MARKER, "Unsupported marker type 0x%02x")
-JMESSAGE(JERR_VIRTUAL_BUG, "Virtual array controller messed up")
-JMESSAGE(JERR_WIDTH_OVERFLOW, "Image too wide for this implementation")
-JMESSAGE(JERR_XMS_READ, "Read from XMS failed")
-JMESSAGE(JERR_XMS_WRITE, "Write to XMS failed")
-JMESSAGE(JMSG_COPYRIGHT, JCOPYRIGHT)
-JMESSAGE(JMSG_VERSION, JVERSION)
-JMESSAGE(JTRC_16BIT_TABLES,
-	 "Caution: quantization tables are too coarse for baseline JPEG")
-JMESSAGE(JTRC_ADOBE,
-	 "Adobe APP14 marker: version %d, flags 0x%04x 0x%04x, transform %d")
-JMESSAGE(JTRC_APP0, "Unknown APP0 marker (not JFIF), length %u")
-JMESSAGE(JTRC_APP14, "Unknown APP14 marker (not Adobe), length %u")
-JMESSAGE(JTRC_DAC, "Define Arithmetic Table 0x%02x: 0x%02x")
-JMESSAGE(JTRC_DHT, "Define Huffman Table 0x%02x")
-JMESSAGE(JTRC_DQT, "Define Quantization Table %d  precision %d")
-JMESSAGE(JTRC_DRI, "Define Restart Interval %u")
-JMESSAGE(JTRC_EMS_CLOSE, "Freed EMS handle %u")
-JMESSAGE(JTRC_EMS_OPEN, "Obtained EMS handle %u")
-JMESSAGE(JTRC_EOI, "End Of Image")
-JMESSAGE(JTRC_HUFFBITS, "        %3d %3d %3d %3d %3d %3d %3d %3d")
-JMESSAGE(JTRC_JFIF, "JFIF APP0 marker: version %d.%02d, density %dx%d  %d")
-JMESSAGE(JTRC_JFIF_BADTHUMBNAILSIZE,
-	 "Warning: thumbnail image size does not match data length %u")
-JMESSAGE(JTRC_JFIF_EXTENSION,
-	 "JFIF extension marker: type 0x%02x, length %u")
-JMESSAGE(JTRC_JFIF_THUMBNAIL, "    with %d x %d thumbnail image")
-JMESSAGE(JTRC_MISC_MARKER, "Miscellaneous marker 0x%02x, length %u")
-JMESSAGE(JTRC_PARMLESS_MARKER, "Unexpected marker 0x%02x")
-JMESSAGE(JTRC_QUANTVALS, "        %4u %4u %4u %4u %4u %4u %4u %4u")
-JMESSAGE(JTRC_QUANT_3_NCOLORS, "Quantizing to %d = %d*%d*%d colors")
-JMESSAGE(JTRC_QUANT_NCOLORS, "Quantizing to %d colors")
-JMESSAGE(JTRC_QUANT_SELECTED, "Selected %d colors for quantization")
-JMESSAGE(JTRC_RECOVERY_ACTION, "At marker 0x%02x, recovery action %d")
-JMESSAGE(JTRC_RST, "RST%d")
-JMESSAGE(JTRC_SMOOTH_NOTIMPL,
-	 "Smoothing not supported with nonstandard sampling ratios")
-JMESSAGE(JTRC_SOF, "Start Of Frame 0x%02x: width=%u, height=%u, components=%d")
-JMESSAGE(JTRC_SOF_COMPONENT, "    Component %d: %dhx%dv q=%d")
-JMESSAGE(JTRC_SOI, "Start of Image")
-JMESSAGE(JTRC_SOS, "Start Of Scan: %d components")
-JMESSAGE(JTRC_SOS_COMPONENT, "    Component %d: dc=%d ac=%d")
-JMESSAGE(JTRC_SOS_PARAMS, "  Ss=%d, Se=%d, Ah=%d, Al=%d")
-JMESSAGE(JTRC_TFILE_CLOSE, "Closed temporary file %s")
-JMESSAGE(JTRC_TFILE_OPEN, "Opened temporary file %s")
-JMESSAGE(JTRC_THUMB_JPEG,
-	 "JFIF extension marker: JPEG-compressed thumbnail image, length %u")
-JMESSAGE(JTRC_THUMB_PALETTE,
-	 "JFIF extension marker: palette thumbnail image, length %u")
-JMESSAGE(JTRC_THUMB_RGB,
-	 "JFIF extension marker: RGB thumbnail image, length %u")
-JMESSAGE(JTRC_UNKNOWN_IDS,
-	 "Unrecognized component IDs %d %d %d, assuming YCbCr")
-JMESSAGE(JTRC_XMS_CLOSE, "Freed XMS handle %u")
-JMESSAGE(JTRC_XMS_OPEN, "Obtained XMS handle %u")
-JMESSAGE(JWRN_ADOBE_XFORM, "Unknown Adobe color transform code %d")
-JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
-JMESSAGE(JWRN_BOGUS_PROGRESSION,
-	 "Inconsistent progression sequence for component %d coefficient %d")
-JMESSAGE(JWRN_EXTRANEOUS_DATA,
-	 "Corrupt JPEG data: %u extraneous bytes before marker 0x%02x")
-JMESSAGE(JWRN_HIT_MARKER, "Corrupt JPEG data: premature end of data segment")
-JMESSAGE(JWRN_HUFF_BAD_CODE, "Corrupt JPEG data: bad Huffman code")
-JMESSAGE(JWRN_JFIF_MAJOR, "Warning: unknown JFIF revision number %d.%02d")
-JMESSAGE(JWRN_JPEG_EOF, "Premature end of JPEG file")
-JMESSAGE(JWRN_MUST_RESYNC,
-	 "Corrupt JPEG data: found marker 0x%02x instead of RST%d")
-JMESSAGE(JWRN_NOT_SEQUENTIAL, "Invalid SOS parameters for sequential JPEG")
-JMESSAGE(JWRN_TOO_MUCH_DATA, "Application transferred too many scanlines")
-
-#ifdef JMAKE_ENUM_LIST
-
-  JMSG_LASTMSGCODE
-} J_MESSAGE_CODE;
-
-#undef JMAKE_ENUM_LIST
-#endif /* JMAKE_ENUM_LIST */
-
-/* Zap JMESSAGE macro so that future re-inclusions do nothing by default */
-#undef JMESSAGE
-
-
-#ifndef JERROR_H
-#define JERROR_H
-
-/* Macros to simplify using the error and trace message stuff */
-/* The first parameter is either type of cinfo pointer */
-
-/* Fatal errors (print message and exit) */
-#define ERREXIT(cinfo,code)  \
-  ((cinfo)->err->msg_code = (code), \
-   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
-#define ERREXIT1(cinfo,code,p1)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
-#define ERREXIT2(cinfo,code,p1,p2)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (cinfo)->err->msg_parm.i[1] = (p2), \
-   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
-#define ERREXIT3(cinfo,code,p1,p2,p3)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (cinfo)->err->msg_parm.i[1] = (p2), \
-   (cinfo)->err->msg_parm.i[2] = (p3), \
-   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
-#define ERREXIT4(cinfo,code,p1,p2,p3,p4)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (cinfo)->err->msg_parm.i[1] = (p2), \
-   (cinfo)->err->msg_parm.i[2] = (p3), \
-   (cinfo)->err->msg_parm.i[3] = (p4), \
-   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
-#define ERREXIT6(cinfo,code,p1,p2,p3,p4,p5,p6)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (cinfo)->err->msg_parm.i[1] = (p2), \
-   (cinfo)->err->msg_parm.i[2] = (p3), \
-   (cinfo)->err->msg_parm.i[3] = (p4), \
-   (cinfo)->err->msg_parm.i[4] = (p5), \
-   (cinfo)->err->msg_parm.i[5] = (p6), \
-   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
-#define ERREXITS(cinfo,code,str)  \
-  ((cinfo)->err->msg_code = (code), \
-   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
-   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
-
-#define MAKESTMT(stuff)		do { stuff } while (0)
-
-/* Nonfatal errors (we can keep going, but the data is probably corrupt) */
-#define WARNMS(cinfo,code)  \
-  ((cinfo)->err->msg_code = (code), \
-   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
-#define WARNMS1(cinfo,code,p1)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
-#define WARNMS2(cinfo,code,p1,p2)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (cinfo)->err->msg_parm.i[1] = (p2), \
-   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
-
-/* Informational/debugging messages */
-#define TRACEMS(cinfo,lvl,code)  \
-  ((cinfo)->err->msg_code = (code), \
-   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
-#define TRACEMS1(cinfo,lvl,code,p1)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
-#define TRACEMS2(cinfo,lvl,code,p1,p2)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (cinfo)->err->msg_parm.i[1] = (p2), \
-   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
-#define TRACEMS3(cinfo,lvl,code,p1,p2,p3)  \
-  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
-	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); \
-	   (cinfo)->err->msg_code = (code); \
-	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
-#define TRACEMS4(cinfo,lvl,code,p1,p2,p3,p4)  \
-  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
-	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
-	   (cinfo)->err->msg_code = (code); \
-	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
-#define TRACEMS5(cinfo,lvl,code,p1,p2,p3,p4,p5)  \
-  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
-	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
-	   _mp[4] = (p5); \
-	   (cinfo)->err->msg_code = (code); \
-	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
-#define TRACEMS8(cinfo,lvl,code,p1,p2,p3,p4,p5,p6,p7,p8)  \
-  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
-	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
-	   _mp[4] = (p5); _mp[5] = (p6); _mp[6] = (p7); _mp[7] = (p8); \
-	   (cinfo)->err->msg_code = (code); \
-	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
-#define TRACEMSS(cinfo,lvl,code,str)  \
-  ((cinfo)->err->msg_code = (code), \
-   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
-   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
-
-#endif /* JERROR_H */
diff --git a/dep/libjpeg/include/jmorecfg.h b/dep/libjpeg/include/jmorecfg.h
deleted file mode 100644
index 4638d6af2..000000000
--- a/dep/libjpeg/include/jmorecfg.h
+++ /dev/null
@@ -1,457 +0,0 @@
-/*
- * jmorecfg.h
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 1997-2022 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains additional configuration options that customize the
- * JPEG software for special applications or support machine-dependent
- * optimizations.  Most users will not need to touch this file.
- */
-
-
-/*
- * Define BITS_IN_JSAMPLE as either
- *   8   for 8-bit sample values (the usual setting)
- *   9   for 9-bit sample values
- *   10  for 10-bit sample values
- *   11  for 11-bit sample values
- *   12  for 12-bit sample values
- * Only 8, 9, 10, 11, and 12 bits sample data precision are supported for
- * full-feature DCT processing.  Further depths up to 16-bit may be added
- * later for the lossless modes of operation.
- * Run-time selection and conversion of data precision will be added later
- * and are currently not supported, sorry.
- * Exception:  The transcoding part (jpegtran) supports all settings in a
- * single instance, since it operates on the level of DCT coefficients and
- * not sample values.  The DCT coefficients are of the same type (16 bits)
- * in all cases (see below).
- */
-
-#define BITS_IN_JSAMPLE  8	/* use 8, 9, 10, 11, or 12 */
-
-
-/*
- * Maximum number of components (color channels) allowed in JPEG image.
- * To meet the letter of the JPEG spec, set this to 255.  However, darn
- * few applications need more than 4 channels (maybe 5 for CMYK + alpha
- * mask).  We recommend 10 as a reasonable compromise; use 4 if you are
- * really short on memory.  (Each allowed component costs a hundred or so
- * bytes of storage, whether actually used in an image or not.)
- */
-
-#define MAX_COMPONENTS  10	/* maximum number of image components */
-
-
-/*
- * Basic data types.
- * You may need to change these if you have a machine with unusual data
- * type sizes; for example, "char" not 8 bits, "short" not 16 bits,
- * or "long" not 32 bits.  We don't care whether "int" is 16 or 32 bits,
- * but it had better be at least 16.
- */
-
-/* Representation of a single sample (pixel element value).
- * We frequently allocate large arrays of these, so it's important to keep
- * them small.  But if you have memory to burn and access to char or short
- * arrays is very slow on your hardware, you might want to change these.
- */
-
-#if BITS_IN_JSAMPLE == 8
-/* JSAMPLE should be the smallest type that will hold the values 0..255.
- * You can use a signed char by having GETJSAMPLE mask it with 0xFF.
- */
-
-#ifdef HAVE_UNSIGNED_CHAR
-
-typedef unsigned char JSAMPLE;
-#define GETJSAMPLE(value)  ((int) (value))
-
-#else /* not HAVE_UNSIGNED_CHAR */
-
-typedef char JSAMPLE;
-#ifdef CHAR_IS_UNSIGNED
-#define GETJSAMPLE(value)  ((int) (value))
-#else
-#define GETJSAMPLE(value)  ((int) (value) & 0xFF)
-#endif /* CHAR_IS_UNSIGNED */
-
-#endif /* HAVE_UNSIGNED_CHAR */
-
-#define MAXJSAMPLE	255
-#define CENTERJSAMPLE	128
-
-#endif /* BITS_IN_JSAMPLE == 8 */
-
-
-#if BITS_IN_JSAMPLE == 9
-/* JSAMPLE should be the smallest type that will hold the values 0..511.
- * On nearly all machines "short" will do nicely.
- */
-
-typedef short JSAMPLE;
-#define GETJSAMPLE(value)  ((int) (value))
-
-#define MAXJSAMPLE	511
-#define CENTERJSAMPLE	256
-
-#endif /* BITS_IN_JSAMPLE == 9 */
-
-
-#if BITS_IN_JSAMPLE == 10
-/* JSAMPLE should be the smallest type that will hold the values 0..1023.
- * On nearly all machines "short" will do nicely.
- */
-
-typedef short JSAMPLE;
-#define GETJSAMPLE(value)  ((int) (value))
-
-#define MAXJSAMPLE	1023
-#define CENTERJSAMPLE	512
-
-#endif /* BITS_IN_JSAMPLE == 10 */
-
-
-#if BITS_IN_JSAMPLE == 11
-/* JSAMPLE should be the smallest type that will hold the values 0..2047.
- * On nearly all machines "short" will do nicely.
- */
-
-typedef short JSAMPLE;
-#define GETJSAMPLE(value)  ((int) (value))
-
-#define MAXJSAMPLE	2047
-#define CENTERJSAMPLE	1024
-
-#endif /* BITS_IN_JSAMPLE == 11 */
-
-
-#if BITS_IN_JSAMPLE == 12
-/* JSAMPLE should be the smallest type that will hold the values 0..4095.
- * On nearly all machines "short" will do nicely.
- */
-
-typedef short JSAMPLE;
-#define GETJSAMPLE(value)  ((int) (value))
-
-#define MAXJSAMPLE	4095
-#define CENTERJSAMPLE	2048
-
-#endif /* BITS_IN_JSAMPLE == 12 */
-
-
-/* Representation of a DCT frequency coefficient.
- * This should be a signed value of at least 16 bits; "short" is usually OK.
- * Again, we allocate large arrays of these, but you can change to int
- * if you have memory to burn and "short" is really slow.
- */
-
-typedef short JCOEF;
-
-
-/* Compressed datastreams are represented as arrays of JOCTET.
- * These must be EXACTLY 8 bits wide, at least once they are written to
- * external storage.  Note that when using the stdio data source/destination
- * managers, this is also the data type passed to fread/fwrite.
- */
-
-#ifdef HAVE_UNSIGNED_CHAR
-
-typedef unsigned char JOCTET;
-#define GETJOCTET(value)  (value)
-
-#else /* not HAVE_UNSIGNED_CHAR */
-
-typedef char JOCTET;
-#ifdef CHAR_IS_UNSIGNED
-#define GETJOCTET(value)  (value)
-#else
-#define GETJOCTET(value)  ((value) & 0xFF)
-#endif /* CHAR_IS_UNSIGNED */
-
-#endif /* HAVE_UNSIGNED_CHAR */
-
-
-/* These typedefs are used for various table entries and so forth.
- * They must be at least as wide as specified; but making them too big
- * won't cost a huge amount of memory, so we don't provide special
- * extraction code like we did for JSAMPLE.  (In other words, these
- * typedefs live at a different point on the speed/space tradeoff curve.)
- */
-
-/* UINT8 must hold at least the values 0..255. */
-
-#ifdef HAVE_UNSIGNED_CHAR
-typedef unsigned char UINT8;
-#else /* not HAVE_UNSIGNED_CHAR */
-#ifdef CHAR_IS_UNSIGNED
-typedef char UINT8;
-#else /* not CHAR_IS_UNSIGNED */
-typedef short UINT8;
-#endif /* CHAR_IS_UNSIGNED */
-#endif /* HAVE_UNSIGNED_CHAR */
-
-/* UINT16 must hold at least the values 0..65535. */
-
-#ifdef HAVE_UNSIGNED_SHORT
-typedef unsigned short UINT16;
-#else /* not HAVE_UNSIGNED_SHORT */
-typedef unsigned int UINT16;
-#endif /* HAVE_UNSIGNED_SHORT */
-
-/* INT16 must hold at least the values -32768..32767. */
-
-#ifndef XMD_H			/* X11/xmd.h correctly defines INT16 */
-typedef short INT16;
-#endif
-
-/* INT32 must hold at least signed 32-bit values. */
-
-#ifndef XMD_H			/* X11/xmd.h correctly defines INT32 */
-#ifndef _BASETSD_H_		/* Microsoft defines it in basetsd.h */
-#ifndef _BASETSD_H		/* MinGW is slightly different */
-#ifndef QGLOBAL_H		/* Qt defines it in qglobal.h */
-typedef long INT32;
-#endif
-#endif
-#endif
-#endif
-
-/* Datatype used for image dimensions.  The JPEG standard only supports
- * images up to 64K*64K due to 16-bit fields in SOF markers.  Therefore
- * "unsigned int" is sufficient on all machines.  However, if you need to
- * handle larger images and you don't mind deviating from the spec, you
- * can change this datatype.
- */
-
-typedef unsigned int JDIMENSION;
-
-#define JPEG_MAX_DIMENSION  65500L  /* a tad under 64K to prevent overflows */
-
-
-/* These macros are used in all function definitions and extern declarations.
- * You could modify them if you need to change function linkage conventions;
- * in particular, you'll need to do that to make the library a Windows DLL.
- * Another application is to make all functions global for use with debuggers
- * or code profilers that require it.
- */
-
-/* a function called through method pointers: */
-#define METHODDEF(type)		static type
-/* a function used only in its module: */
-#define LOCAL(type)		static type
-/* a function referenced thru EXTERNs: */
-#define GLOBAL(type)		type
-/* a reference to a GLOBAL function: */
-#define EXTERN(type)		extern type
-
-
-/* This macro is used to declare a "method", that is, a function pointer.
- * We want to supply prototype parameters if the compiler can cope.
- * Note that the arglist parameter must be parenthesized!
- * Again, you can customize this if you need special linkage keywords.
- */
-
-#ifdef HAVE_PROTOTYPES
-#define JMETHOD(type,methodname,arglist)  type (*methodname) arglist
-#else
-#define JMETHOD(type,methodname,arglist)  type (*methodname) ()
-#endif
-
-
-/* The noreturn type identifier is used to declare functions
- * which cannot return.
- * Compilers can thus create more optimized code and perform
- * better checks for warnings and errors.
- * Static analyzer tools can make improved inferences about
- * execution paths and are prevented from giving false alerts.
- *
- * Unfortunately, the proposed specifications of corresponding
- * extensions in the Dec 2011 ISO C standard revision (C11),
- * GCC, MSVC, etc. are not viable.
- * Thus we introduce a user defined type to declare noreturn
- * functions at least for clarity.  A proper compiler would
- * have a suitable noreturn type to match in place of void.
- */
-
-#ifndef HAVE_NORETURN_T
-typedef void noreturn_t;
-#endif
-
-
-/* Here is the pseudo-keyword for declaring pointers that must be "far"
- * on 80x86 machines.  Most of the specialized coding for 80x86 is handled
- * by just saying "FAR *" where such a pointer is needed.  In a few places
- * explicit coding is needed; see uses of the NEED_FAR_POINTERS symbol.
- */
-
-#ifndef FAR
-#ifdef NEED_FAR_POINTERS
-#define FAR  far
-#else
-#define FAR
-#endif
-#endif
-
-
-/*
- * On a few systems, type boolean and/or its values FALSE, TRUE may appear
- * in standard header files.  Or you may have conflicts with application-
- * specific header files that you want to include together with these files.
- * Defining HAVE_BOOLEAN before including jpeglib.h should make it work.
- */
-
-#ifndef HAVE_BOOLEAN
-#if defined FALSE || defined TRUE || defined QGLOBAL_H
-/* Qt3 defines FALSE and TRUE as "const" variables in qglobal.h */
-typedef int boolean;
-#ifndef FALSE			/* in case these macros already exist */
-#define FALSE	0		/* values of boolean */
-#endif
-#ifndef TRUE
-#define TRUE	1
-#endif
-#else
-typedef enum { FALSE = 0, TRUE = 1 } boolean;
-#endif
-#endif
-
-
-/*
- * The remaining options affect code selection within the JPEG library,
- * but they don't need to be visible to most applications using the library.
- * To minimize application namespace pollution, the symbols won't be
- * defined unless JPEG_INTERNALS or JPEG_INTERNAL_OPTIONS has been defined.
- */
-
-#ifdef JPEG_INTERNALS
-#define JPEG_INTERNAL_OPTIONS
-#endif
-
-#ifdef JPEG_INTERNAL_OPTIONS
-
-
-/*
- * These defines indicate whether to include various optional functions.
- * Undefining some of these symbols will produce a smaller but less capable
- * library.  Note that you can leave certain source files out of the
- * compilation/linking process if you've #undef'd the corresponding symbols.
- * (You may HAVE to do that if your compiler doesn't like null source files.)
- */
-
-/* Capability options common to encoder and decoder: */
-
-#define DCT_ISLOW_SUPPORTED	/* slow but accurate integer algorithm */
-#define DCT_IFAST_SUPPORTED	/* faster, less accurate integer method */
-#define DCT_FLOAT_SUPPORTED	/* floating-point: accurate, fast on fast HW */
-
-/* Encoder capability options: */
-
-#define C_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
-#define C_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
-#define C_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN) */
-#define DCT_SCALING_SUPPORTED	/* Input rescaling via DCT? (Requires DCT_ISLOW) */
-#define ENTROPY_OPT_SUPPORTED	    /* Optimization of entropy coding parms? */
-/* Note: if you selected more than 8-bit data precision, it is dangerous to
- * turn off ENTROPY_OPT_SUPPORTED.  The standard Huffman tables are only
- * good for 8-bit precision, so arithmetic coding is recommended for higher
- * precision.  The Huffman encoder normally uses entropy optimization to
- * compute usable tables for higher precision.  Otherwise, you'll have to
- * supply different default Huffman tables.
- * The exact same statements apply for progressive JPEG: the default tables
- * don't work for progressive mode.  (This may get fixed, however.)
- */
-#define INPUT_SMOOTHING_SUPPORTED   /* Input image smoothing option? */
-
-/* Decoder capability options: */
-
-#define D_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
-#define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
-#define D_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN) */
-#define IDCT_SCALING_SUPPORTED	/* Output rescaling via IDCT? (Requires DCT_ISLOW) */
-#define SAVE_MARKERS_SUPPORTED	    /* jpeg_save_markers() needed? */
-#define BLOCK_SMOOTHING_SUPPORTED   /* Block smoothing? (Progressive only) */
-#undef  UPSAMPLE_SCALING_SUPPORTED  /* Output rescaling at upsample stage? */
-#define UPSAMPLE_MERGING_SUPPORTED  /* Fast path for sloppy upsampling? */
-#define QUANT_1PASS_SUPPORTED	    /* 1-pass color quantization? */
-#define QUANT_2PASS_SUPPORTED	    /* 2-pass color quantization? */
-
-/* more capability options later, no doubt */
-
-
-/*
- * Ordering of RGB data in scanlines passed to or from the application.
- * If your application wants to deal with data in the order B,G,R, just
- * #define JPEG_USE_RGB_CUSTOM in jconfig.h, or define your own custom
- * order in jconfig.h and #define JPEG_HAVE_RGB_CUSTOM.
- * You can also deal with formats such as R,G,B,X (one extra byte per pixel)
- * by changing RGB_PIXELSIZE.
- * Note that changing the offsets will also change
- * the order in which colormap data is organized.
- * RESTRICTIONS:
- * 1. The sample applications cjpeg,djpeg do NOT support modified RGB formats.
- * 2. The color quantizer modules will not behave desirably if RGB_PIXELSIZE
- *    is not 3 (they don't understand about dummy color components!).
- *    So you can't use color quantization if you change that value.
- */
-
-#ifndef JPEG_HAVE_RGB_CUSTOM
-#ifdef JPEG_USE_RGB_CUSTOM
-#define RGB_RED		2	/* Offset of Red in an RGB scanline element */
-#define RGB_GREEN	1	/* Offset of Green */
-#define RGB_BLUE	0	/* Offset of Blue */
-#else
-#define RGB_RED		0	/* Offset of Red in an RGB scanline element */
-#define RGB_GREEN	1	/* Offset of Green */
-#define RGB_BLUE	2	/* Offset of Blue */
-#endif
-#define RGB_PIXELSIZE	3	/* JSAMPLEs per RGB scanline element */
-#endif
-
-
-/* Definitions for speed-related optimizations. */
-
-
-/* If your compiler supports inline functions, define INLINE
- * as the inline keyword; otherwise define it as empty.
- */
-
-#ifndef INLINE
-#ifdef __GNUC__			/* for instance, GNU C knows about inline */
-#define INLINE __inline__
-#endif
-#ifndef INLINE
-#define INLINE			/* default is to define it as empty */
-#endif
-#endif
-
-
-/* On some machines (notably 68000 series) "int" is 32 bits, but multiplying
- * two 16-bit shorts is faster than multiplying two ints.  Define MULTIPLIER
- * as short on such a machine.  MULTIPLIER must be at least 16 bits wide.
- */
-
-#ifndef MULTIPLIER
-#define MULTIPLIER  int		/* type for fastest integer multiply */
-#endif
-
-
-/* FAST_FLOAT should be either float or double, whichever is done faster
- * by your compiler.  (Note that this type is only used in the floating point
- * DCT routines, so it only matters if you've defined DCT_FLOAT_SUPPORTED.)
- * Typically, float is faster in ANSI C compilers, while double is faster in
- * pre-ANSI compilers (because they insist on converting to double anyway).
- * The code below therefore chooses float if we have ANSI-style prototypes.
- */
-
-#ifndef FAST_FLOAT
-#ifdef HAVE_PROTOTYPES
-#define FAST_FLOAT  float
-#else
-#define FAST_FLOAT  double
-#endif
-#endif
-
-#endif /* JPEG_INTERNAL_OPTIONS */
diff --git a/dep/libjpeg/include/jpegint.h b/dep/libjpeg/include/jpegint.h
deleted file mode 100644
index 3528bff5b..000000000
--- a/dep/libjpeg/include/jpegint.h
+++ /dev/null
@@ -1,445 +0,0 @@
-/*
- * jpegint.h
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 1997-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file provides common declarations for the various JPEG modules.
- * These declarations are considered internal to the JPEG library; most
- * applications using the library shouldn't need to include this file.
- */
-
-
-/* Declarations for both compression & decompression */
-
-typedef enum {			/* Operating modes for buffer controllers */
-	JBUF_PASS_THRU,		/* Plain stripwise operation */
-	/* Remaining modes require a full-image buffer to have been created */
-	JBUF_SAVE_SOURCE,	/* Run source subobject only, save output */
-	JBUF_CRANK_DEST,	/* Run dest subobject only, using saved data */
-	JBUF_SAVE_AND_PASS	/* Run both subobjects, save output */
-} J_BUF_MODE;
-
-/* Values of global_state field (jdapi.c has some dependencies on ordering!) */
-#define CSTATE_START	100	/* after create_compress */
-#define CSTATE_SCANNING	101	/* start_compress done, write_scanlines OK */
-#define CSTATE_RAW_OK	102	/* start_compress done, write_raw_data OK */
-#define CSTATE_WRCOEFS	103	/* jpeg_write_coefficients done */
-#define DSTATE_START	200	/* after create_decompress */
-#define DSTATE_INHEADER	201	/* reading header markers, no SOS yet */
-#define DSTATE_READY	202	/* found SOS, ready for start_decompress */
-#define DSTATE_PRELOAD	203	/* reading multiscan file in start_decompress*/
-#define DSTATE_PRESCAN	204	/* performing dummy pass for 2-pass quant */
-#define DSTATE_SCANNING	205	/* start_decompress done, read_scanlines OK */
-#define DSTATE_RAW_OK	206	/* start_decompress done, read_raw_data OK */
-#define DSTATE_BUFIMAGE	207	/* expecting jpeg_start_output */
-#define DSTATE_BUFPOST	208	/* looking for SOS/EOI in jpeg_finish_output */
-#define DSTATE_RDCOEFS	209	/* reading file in jpeg_read_coefficients */
-#define DSTATE_STOPPING	210	/* looking for EOI in jpeg_finish_decompress */
-
-
-/* Declarations for compression modules */
-
-/* Master control module */
-struct jpeg_comp_master {
-  JMETHOD(void, prepare_for_pass, (j_compress_ptr cinfo));
-  JMETHOD(void, pass_startup, (j_compress_ptr cinfo));
-  JMETHOD(void, finish_pass, (j_compress_ptr cinfo));
-
-  /* State variables made visible to other modules */
-  boolean call_pass_startup;	/* True if pass_startup must be called */
-  boolean is_last_pass;		/* True during last pass */
-};
-
-/* Main buffer control (downsampled-data buffer) */
-struct jpeg_c_main_controller {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
-  JMETHOD(void, process_data, (j_compress_ptr cinfo,
-			       JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-			       JDIMENSION in_rows_avail));
-};
-
-/* Compression preprocessing (downsampling input buffer control) */
-struct jpeg_c_prep_controller {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
-  JMETHOD(void, pre_process_data, (j_compress_ptr cinfo,
-				   JSAMPARRAY input_buf,
-				   JDIMENSION *in_row_ctr,
-				   JDIMENSION in_rows_avail,
-				   JSAMPIMAGE output_buf,
-				   JDIMENSION *out_row_group_ctr,
-				   JDIMENSION out_row_groups_avail));
-};
-
-/* Coefficient buffer control */
-struct jpeg_c_coef_controller {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
-  JMETHOD(boolean, compress_data, (j_compress_ptr cinfo,
-				   JSAMPIMAGE input_buf));
-};
-
-/* Colorspace conversion */
-struct jpeg_color_converter {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
-  JMETHOD(void, color_convert, (j_compress_ptr cinfo,
-				JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-				JDIMENSION output_row, int num_rows));
-};
-
-/* Downsampling */
-struct jpeg_downsampler {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
-  JMETHOD(void, downsample, (j_compress_ptr cinfo,
-			     JSAMPIMAGE input_buf, JDIMENSION in_row_index,
-			     JSAMPIMAGE output_buf,
-			     JDIMENSION out_row_group_index));
-
-  boolean need_context_rows;	/* TRUE if need rows above & below */
-};
-
-/* Forward DCT (also controls coefficient quantization) */
-typedef JMETHOD(void, forward_DCT_ptr,
-		(j_compress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
-		 JDIMENSION start_col, JDIMENSION num_blocks));
-
-struct jpeg_forward_dct {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
-  /* It is useful to allow each component to have a separate FDCT method. */
-  forward_DCT_ptr forward_DCT[MAX_COMPONENTS];
-};
-
-/* Entropy encoding */
-struct jpeg_entropy_encoder {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo, boolean gather_statistics));
-  JMETHOD(boolean, encode_mcu, (j_compress_ptr cinfo, JBLOCKARRAY MCU_data));
-  JMETHOD(void, finish_pass, (j_compress_ptr cinfo));
-};
-
-/* Marker writing */
-struct jpeg_marker_writer {
-  JMETHOD(void, write_file_header, (j_compress_ptr cinfo));
-  JMETHOD(void, write_frame_header, (j_compress_ptr cinfo));
-  JMETHOD(void, write_scan_header, (j_compress_ptr cinfo));
-  JMETHOD(void, write_file_trailer, (j_compress_ptr cinfo));
-  JMETHOD(void, write_tables_only, (j_compress_ptr cinfo));
-  /* These routines are exported to allow insertion of extra markers */
-  /* Probably only COM and APPn markers should be written this way */
-  JMETHOD(void, write_marker_header, (j_compress_ptr cinfo, int marker,
-				      unsigned int datalen));
-  JMETHOD(void, write_marker_byte, (j_compress_ptr cinfo, int val));
-};
-
-
-/* Declarations for decompression modules */
-
-/* Master control module */
-struct jpeg_decomp_master {
-  JMETHOD(void, prepare_for_output_pass, (j_decompress_ptr cinfo));
-  JMETHOD(void, finish_output_pass, (j_decompress_ptr cinfo));
-
-  /* State variables made visible to other modules */
-  boolean is_dummy_pass;	/* True during 1st pass for 2-pass quant */
-};
-
-/* Input control module */
-struct jpeg_input_controller {
-  JMETHOD(int, consume_input, (j_decompress_ptr cinfo));
-  JMETHOD(void, reset_input_controller, (j_decompress_ptr cinfo));
-  JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
-  JMETHOD(void, finish_input_pass, (j_decompress_ptr cinfo));
-
-  /* State variables made visible to other modules */
-  boolean has_multiple_scans;	/* True if file has multiple scans */
-  boolean eoi_reached;		/* True when EOI has been consumed */
-};
-
-/* Main buffer control (downsampled-data buffer) */
-struct jpeg_d_main_controller {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode));
-  JMETHOD(void, process_data, (j_decompress_ptr cinfo,
-			       JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-			       JDIMENSION out_rows_avail));
-};
-
-/* Coefficient buffer control */
-struct jpeg_d_coef_controller {
-  JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
-  JMETHOD(int, consume_data, (j_decompress_ptr cinfo));
-  JMETHOD(void, start_output_pass, (j_decompress_ptr cinfo));
-  JMETHOD(int, decompress_data, (j_decompress_ptr cinfo,
-				 JSAMPIMAGE output_buf));
-  /* Pointer to array of coefficient virtual arrays, or NULL if none */
-  jvirt_barray_ptr *coef_arrays;
-};
-
-/* Decompression postprocessing (color quantization buffer control) */
-struct jpeg_d_post_controller {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode));
-  JMETHOD(void, post_process_data, (j_decompress_ptr cinfo,
-				    JSAMPIMAGE input_buf,
-				    JDIMENSION *in_row_group_ctr,
-				    JDIMENSION in_row_groups_avail,
-				    JSAMPARRAY output_buf,
-				    JDIMENSION *out_row_ctr,
-				    JDIMENSION out_rows_avail));
-};
-
-/* Marker reading & parsing */
-struct jpeg_marker_reader {
-  JMETHOD(void, reset_marker_reader, (j_decompress_ptr cinfo));
-  /* Read markers until SOS or EOI.
-   * Returns same codes as are defined for jpeg_consume_input:
-   * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
-   */
-  JMETHOD(int, read_markers, (j_decompress_ptr cinfo));
-  /* Read a restart marker --- exported for use by entropy decoder only */
-  jpeg_marker_parser_method read_restart_marker;
-
-  /* State of marker reader --- nominally internal, but applications
-   * supplying COM or APPn handlers might like to know the state.
-   */
-  boolean saw_SOI;		/* found SOI? */
-  boolean saw_SOF;		/* found SOF? */
-  int next_restart_num;		/* next restart number expected (0-7) */
-  unsigned int discarded_bytes;	/* # of bytes skipped looking for a marker */
-};
-
-/* Entropy decoding */
-struct jpeg_entropy_decoder {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
-  JMETHOD(boolean, decode_mcu, (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data));
-  JMETHOD(void, finish_pass, (j_decompress_ptr cinfo));
-};
-
-/* Inverse DCT (also performs dequantization) */
-typedef JMETHOD(void, inverse_DCT_method_ptr,
-		(j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col));
-
-struct jpeg_inverse_dct {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
-  /* It is useful to allow each component to have a separate IDCT method. */
-  inverse_DCT_method_ptr inverse_DCT[MAX_COMPONENTS];
-};
-
-/* Upsampling (note that upsampler must also call color converter) */
-struct jpeg_upsampler {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
-  JMETHOD(void, upsample, (j_decompress_ptr cinfo,
-			   JSAMPIMAGE input_buf,
-			   JDIMENSION *in_row_group_ctr,
-			   JDIMENSION in_row_groups_avail,
-			   JSAMPARRAY output_buf,
-			   JDIMENSION *out_row_ctr,
-			   JDIMENSION out_rows_avail));
-
-  boolean need_context_rows;	/* TRUE if need rows above & below */
-};
-
-/* Colorspace conversion */
-struct jpeg_color_deconverter {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
-  JMETHOD(void, color_convert, (j_decompress_ptr cinfo,
-				JSAMPIMAGE input_buf, JDIMENSION input_row,
-				JSAMPARRAY output_buf, int num_rows));
-};
-
-/* Color quantization or color precision reduction */
-struct jpeg_color_quantizer {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, boolean is_pre_scan));
-  JMETHOD(void, color_quantize, (j_decompress_ptr cinfo,
-				 JSAMPARRAY input_buf, JSAMPARRAY output_buf,
-				 int num_rows));
-  JMETHOD(void, finish_pass, (j_decompress_ptr cinfo));
-  JMETHOD(void, new_color_map, (j_decompress_ptr cinfo));
-};
-
-
-/* Definition of range extension bits for decompression processes.
- * See the comments with prepare_range_limit_table (in jdmaster.c)
- * for more info.
- * The recommended default value for normal applications is 2.
- * Applications with special requirements may use a different value.
- * For example, Ghostscript wants to use 3 for proper handling of
- * wacky images with oversize coefficient values.
- */
-
-#define RANGE_BITS	2
-#define RANGE_CENTER	(CENTERJSAMPLE << RANGE_BITS)
-
-
-/* Miscellaneous useful macros */
-
-#undef MAX
-#define MAX(a,b)	((a) > (b) ? (a) : (b))
-#undef MIN
-#define MIN(a,b)	((a) < (b) ? (a) : (b))
-
-
-/* We assume that right shift corresponds to signed division by 2 with
- * rounding towards minus infinity.  This is correct for typical "arithmetic
- * shift" instructions that shift in copies of the sign bit.  But some
- * C compilers implement >> with an unsigned shift.  For these machines you
- * must define RIGHT_SHIFT_IS_UNSIGNED.
- * RIGHT_SHIFT provides a proper signed right shift of an INT32 quantity.
- * It is only applied with constant shift counts.  SHIFT_TEMPS must be
- * included in the variables of any routine using RIGHT_SHIFT.
- */
-
-#ifdef RIGHT_SHIFT_IS_UNSIGNED
-#define SHIFT_TEMPS	INT32 shift_temp;
-#define RIGHT_SHIFT(x,shft)  \
-	((shift_temp = (x)) < 0 ? \
-	 (shift_temp >> (shft)) | ((~((INT32) 0)) << (32-(shft))) : \
-	 (shift_temp >> (shft)))
-#else
-#define SHIFT_TEMPS
-#define RIGHT_SHIFT(x,shft)	((x) >> (shft))
-#endif
-
-/* Descale and correctly round an INT32 value that's scaled by N bits.
- * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
- * the fudge factor is correct for either sign of X.
- */
-
-#define DESCALE(x,n)	RIGHT_SHIFT((x) + ((INT32) 1 << ((n)-1)), n)
-
-
-/* Short forms of external names for systems with brain-damaged linkers. */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jinit_compress_master	jICompress
-#define jinit_c_master_control	jICMaster
-#define jinit_c_main_controller	jICMainC
-#define jinit_c_prep_controller	jICPrepC
-#define jinit_c_coef_controller	jICCoefC
-#define jinit_color_converter	jICColor
-#define jinit_downsampler	jIDownsampler
-#define jinit_forward_dct	jIFDCT
-#define jinit_huff_encoder	jIHEncoder
-#define jinit_arith_encoder	jIAEncoder
-#define jinit_marker_writer	jIMWriter
-#define jinit_master_decompress	jIDMaster
-#define jinit_d_main_controller	jIDMainC
-#define jinit_d_coef_controller	jIDCoefC
-#define jinit_d_post_controller	jIDPostC
-#define jinit_input_controller	jIInCtlr
-#define jinit_marker_reader	jIMReader
-#define jinit_huff_decoder	jIHDecoder
-#define jinit_arith_decoder	jIADecoder
-#define jinit_inverse_dct	jIIDCT
-#define jinit_upsampler		jIUpsampler
-#define jinit_color_deconverter	jIDColor
-#define jinit_1pass_quantizer	jI1Quant
-#define jinit_2pass_quantizer	jI2Quant
-#define jinit_merged_upsampler	jIMUpsampler
-#define jinit_memory_mgr	jIMemMgr
-#define jdiv_round_up		jDivRound
-#define jround_up		jRound
-#define jzero_far		jZeroFar
-#define jcopy_sample_rows	jCopySamples
-#define jcopy_block_row		jCopyBlocks
-#define jpeg_zigzag_order	jZIGTable
-#define jpeg_natural_order	jZAGTable
-#define jpeg_natural_order7	jZAG7Table
-#define jpeg_natural_order6	jZAG6Table
-#define jpeg_natural_order5	jZAG5Table
-#define jpeg_natural_order4	jZAG4Table
-#define jpeg_natural_order3	jZAG3Table
-#define jpeg_natural_order2	jZAG2Table
-#define jpeg_aritab		jAriTab
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
-
-/* On normal machines we can apply MEMCOPY() and MEMZERO() to sample arrays
- * and coefficient-block arrays.  This won't work on 80x86 because the arrays
- * are FAR and we're assuming a small-pointer memory model.  However, some
- * DOS compilers provide far-pointer versions of memcpy() and memset() even
- * in the small-model libraries.  These will be used if USE_FMEM is defined.
- * Otherwise, the routines in jutils.c do it the hard way.
- */
-
-#ifndef NEED_FAR_POINTERS	/* normal case, same as regular macro */
-#define FMEMZERO(target,size)	MEMZERO(target,size)
-#else				/* 80x86 case */
-#ifdef USE_FMEM
-#define FMEMZERO(target,size)	_fmemset((void FAR *)(target), 0, (size_t)(size))
-#else
-EXTERN(void) jzero_far JPP((void FAR * target, size_t bytestozero));
-#define FMEMZERO(target,size)	jzero_far(target, size)
-#endif
-#endif
-
-
-/* Compression module initialization routines */
-EXTERN(void) jinit_compress_master JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_c_master_control JPP((j_compress_ptr cinfo,
-					 boolean transcode_only));
-EXTERN(void) jinit_c_main_controller JPP((j_compress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_c_prep_controller JPP((j_compress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_c_coef_controller JPP((j_compress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_color_converter JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_downsampler JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_forward_dct JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_huff_encoder JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_arith_encoder JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_marker_writer JPP((j_compress_ptr cinfo));
-/* Decompression module initialization routines */
-EXTERN(void) jinit_master_decompress JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_d_main_controller JPP((j_decompress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_d_coef_controller JPP((j_decompress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_d_post_controller JPP((j_decompress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_input_controller JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_marker_reader JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_huff_decoder JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_arith_decoder JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_inverse_dct JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_upsampler JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_color_deconverter JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_1pass_quantizer JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_2pass_quantizer JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_merged_upsampler JPP((j_decompress_ptr cinfo));
-/* Memory manager initialization */
-EXTERN(void) jinit_memory_mgr JPP((j_common_ptr cinfo));
-
-/* Utility routines in jutils.c */
-EXTERN(long) jdiv_round_up JPP((long a, long b));
-EXTERN(long) jround_up JPP((long a, long b));
-EXTERN(void) jcopy_sample_rows JPP((JSAMPARRAY input_array,
-				    JSAMPARRAY output_array,
-				    int num_rows, JDIMENSION num_cols));
-EXTERN(void) jcopy_block_row JPP((JBLOCKROW input_row, JBLOCKROW output_row,
-				  JDIMENSION num_blocks));
-/* Constant tables in jutils.c */
-#if 0				/* This table is not actually needed in v6a */
-extern const int jpeg_zigzag_order[]; /* natural coef order to zigzag order */
-#endif
-extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */
-extern const int jpeg_natural_order7[]; /* zz to natural order for 7x7 block */
-extern const int jpeg_natural_order6[]; /* zz to natural order for 6x6 block */
-extern const int jpeg_natural_order5[]; /* zz to natural order for 5x5 block */
-extern const int jpeg_natural_order4[]; /* zz to natural order for 4x4 block */
-extern const int jpeg_natural_order3[]; /* zz to natural order for 3x3 block */
-extern const int jpeg_natural_order2[]; /* zz to natural order for 2x2 block */
-
-/* Arithmetic coding probability estimation tables in jaricom.c */
-extern const INT32 jpeg_aritab[];
-
-/* Suppress undefined-structure complaints if necessary. */
-
-#ifdef INCOMPLETE_TYPES_BROKEN
-#ifndef AM_MEMORY_MANAGER	/* only jmemmgr.c defines these */
-struct jvirt_sarray_control { long dummy; };
-struct jvirt_barray_control { long dummy; };
-#endif
-#endif /* INCOMPLETE_TYPES_BROKEN */
diff --git a/dep/libjpeg/include/jpeglib.h b/dep/libjpeg/include/jpeglib.h
deleted file mode 100644
index e7e15ab2c..000000000
--- a/dep/libjpeg/include/jpeglib.h
+++ /dev/null
@@ -1,1183 +0,0 @@
-/*
- * jpeglib.h
- *
- * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2002-2022 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file defines the application interface for the JPEG library.
- * Most applications using the library need only include this file,
- * and perhaps jerror.h if they want to know the exact error codes.
- */
-
-#ifndef JPEGLIB_H
-#define JPEGLIB_H
-
-/*
- * First we include the configuration files that record how this
- * installation of the JPEG library is set up.  jconfig.h can be
- * generated automatically for many systems.  jmorecfg.h contains
- * manual configuration options that most people need not worry about.
- */
-
-#ifndef JCONFIG_INCLUDED	/* in case jinclude.h already did */
-#include "jconfig.h"		/* widely used configuration options */
-#endif
-#include "jmorecfg.h"		/* seldom changed options */
-
-
-#ifdef __cplusplus
-#ifndef DONT_USE_EXTERN_C
-extern "C" {
-#endif
-#endif
-
-/* Version IDs for the JPEG library.
- * Might be useful for tests like "#if JPEG_LIB_VERSION >= 90".
- */
-
-#define JPEG_LIB_VERSION        90	/* Compatibility version 9.0 */
-#define JPEG_LIB_VERSION_MAJOR  9
-#define JPEG_LIB_VERSION_MINOR  6
-
-
-/* Various constants determining the sizes of things.
- * All of these are specified by the JPEG standard,
- * so don't change them if you want to be compatible.
- */
-
-#define DCTSIZE		    8	/* The basic DCT block is 8x8 coefficients */
-#define DCTSIZE2	    64	/* DCTSIZE squared; # of elements in a block */
-#define NUM_QUANT_TBLS      4	/* Quantization tables are numbered 0..3 */
-#define NUM_HUFF_TBLS       4	/* Huffman tables are numbered 0..3 */
-#define NUM_ARITH_TBLS      16	/* Arith-coding tables are numbered 0..15 */
-#define MAX_COMPS_IN_SCAN   4	/* JPEG limit on # of components in one scan */
-#define MAX_SAMP_FACTOR     4	/* JPEG limit on sampling factors */
-/* Unfortunately, some bozo at Adobe saw no reason to be bound by the standard;
- * the PostScript DCT filter can emit files with many more than 10 blocks/MCU.
- * If you happen to run across such a file, you can up D_MAX_BLOCKS_IN_MCU
- * to handle it.  We even let you do this from the jconfig.h file.  However,
- * we strongly discourage changing C_MAX_BLOCKS_IN_MCU; just because Adobe
- * sometimes emits noncompliant files doesn't mean you should too.
- */
-#define C_MAX_BLOCKS_IN_MCU   10 /* compressor's limit on blocks per MCU */
-#ifndef D_MAX_BLOCKS_IN_MCU
-#define D_MAX_BLOCKS_IN_MCU   10 /* decompressor's limit on blocks per MCU */
-#endif
-
-
-/* Data structures for images (arrays of samples and of DCT coefficients).
- * On 80x86 machines, the image arrays are too big for near pointers,
- * but the pointer arrays can fit in near memory.
- */
-
-typedef JSAMPLE FAR *JSAMPROW;	/* ptr to one image row of pixel samples. */
-typedef JSAMPROW *JSAMPARRAY;	/* ptr to some rows (a 2-D sample array) */
-typedef JSAMPARRAY *JSAMPIMAGE;	/* a 3-D sample array: top index is color */
-
-typedef JCOEF JBLOCK[DCTSIZE2];	/* one block of coefficients */
-typedef JBLOCK FAR *JBLOCKROW;	/* pointer to one row of coefficient blocks */
-typedef JBLOCKROW *JBLOCKARRAY;		/* a 2-D array of coefficient blocks */
-typedef JBLOCKARRAY *JBLOCKIMAGE;	/* a 3-D array of coefficient blocks */
-
-typedef JCOEF FAR *JCOEFPTR;	/* useful in a couple of places */
-
-
-/* Types for JPEG compression parameters and working tables. */
-
-
-/* DCT coefficient quantization tables. */
-
-typedef struct {
-  /* This array gives the coefficient quantizers in natural array order
-   * (not the zigzag order in which they are stored in a JPEG DQT marker).
-   * CAUTION: IJG versions prior to v6a kept this array in zigzag order.
-   */
-  UINT16 quantval[DCTSIZE2];	/* quantization step for each coefficient */
-  /* This field is used only during compression.  It's initialized FALSE when
-   * the table is created, and set TRUE when it's been output to the file.
-   * You could suppress output of a table by setting this to TRUE.
-   * (See jpeg_suppress_tables for an example.)
-   */
-  boolean sent_table;		/* TRUE when table has been output */
-} JQUANT_TBL;
-
-
-/* Huffman coding tables. */
-
-typedef struct {
-  /* These two fields directly represent the contents of a JPEG DHT marker */
-  UINT8 bits[17];		/* bits[k] = # of symbols with codes of */
-				/* length k bits; bits[0] is unused */
-  UINT8 huffval[256];		/* The symbols, in order of incr code length */
-  /* This field is used only during compression.  It's initialized FALSE when
-   * the table is created, and set TRUE when it's been output to the file.
-   * You could suppress output of a table by setting this to TRUE.
-   * (See jpeg_suppress_tables for an example.)
-   */
-  boolean sent_table;		/* TRUE when table has been output */
-} JHUFF_TBL;
-
-
-/* Basic info about one component (color channel). */
-
-typedef struct {
-  /* These values are fixed over the whole image. */
-  /* For compression, they must be supplied by parameter setup; */
-  /* for decompression, they are read from the SOF marker. */
-  int component_id;		/* identifier for this component (0..255) */
-  int component_index;		/* its index in SOF or cinfo->comp_info[] */
-  int h_samp_factor;		/* horizontal sampling factor (1..4) */
-  int v_samp_factor;		/* vertical sampling factor (1..4) */
-  int quant_tbl_no;		/* quantization table selector (0..3) */
-  /* These values may vary between scans. */
-  /* For compression, they must be supplied by parameter setup; */
-  /* for decompression, they are read from the SOS marker. */
-  /* The decompressor output side may not use these variables. */
-  int dc_tbl_no;		/* DC entropy table selector (0..3) */
-  int ac_tbl_no;		/* AC entropy table selector (0..3) */
-
-  /* Remaining fields should be treated as private by applications. */
-
-  /* These values are computed during compression or decompression startup: */
-  /* Component's size in DCT blocks.
-   * Any dummy blocks added to complete an MCU are not counted; therefore
-   * these values do not depend on whether a scan is interleaved or not.
-   */
-  JDIMENSION width_in_blocks;
-  JDIMENSION height_in_blocks;
-  /* Size of a DCT block in samples,
-   * reflecting any scaling we choose to apply during the DCT step.
-   * Values from 1 to 16 are supported.
-   * Note that different components may receive different DCT scalings.
-   */
-  int DCT_h_scaled_size;
-  int DCT_v_scaled_size;
-  /* The downsampled dimensions are the component's actual, unpadded number
-   * of samples at the main buffer (preprocessing/compression interface);
-   * DCT scaling is included, so
-   * downsampled_width =
-   *   ceil(image_width * Hi/Hmax * DCT_h_scaled_size/block_size)
-   * and similarly for height.
-   */
-  JDIMENSION downsampled_width;	 /* actual width in samples */
-  JDIMENSION downsampled_height; /* actual height in samples */
-  /* For decompression, in cases where some of the components will be
-   * ignored (eg grayscale output from YCbCr image), we can skip most
-   * computations for the unused components.
-   * For compression, some of the components will need further quantization
-   * scale by factor of 2 after DCT (eg BG_YCC output from normal RGB input).
-   * The field is first set TRUE for decompression, FALSE for compression
-   * in initial_setup, and then adapted in color conversion setup.
-   */
-  boolean component_needed;
-
-  /* These values are computed before starting a scan of the component. */
-  /* The decompressor output side may not use these variables. */
-  int MCU_width;		/* number of blocks per MCU, horizontally */
-  int MCU_height;		/* number of blocks per MCU, vertically */
-  int MCU_blocks;		/* MCU_width * MCU_height */
-  int MCU_sample_width;	/* MCU width in samples: MCU_width * DCT_h_scaled_size */
-  int last_col_width;		/* # of non-dummy blocks across in last MCU */
-  int last_row_height;		/* # of non-dummy blocks down in last MCU */
-
-  /* Saved quantization table for component; NULL if none yet saved.
-   * See jdinput.c comments about the need for this information.
-   * This field is currently used only for decompression.
-   */
-  JQUANT_TBL * quant_table;
-
-  /* Private per-component storage for DCT or IDCT subsystem. */
-  void * dct_table;
-} jpeg_component_info;
-
-
-/* The script for encoding a multiple-scan file is an array of these: */
-
-typedef struct {
-  int comps_in_scan;		/* number of components encoded in this scan */
-  int component_index[MAX_COMPS_IN_SCAN]; /* their SOF/comp_info[] indexes */
-  int Ss, Se;			/* progressive JPEG spectral selection parms */
-  int Ah, Al;			/* progressive JPEG successive approx. parms */
-} jpeg_scan_info;
-
-/* The decompressor can save APPn and COM markers in a list of these: */
-
-typedef struct jpeg_marker_struct FAR * jpeg_saved_marker_ptr;
-
-struct jpeg_marker_struct {
-  jpeg_saved_marker_ptr next;	/* next in list, or NULL */
-  UINT8 marker;			/* marker code: JPEG_COM, or JPEG_APP0+n */
-  unsigned int original_length;	/* # bytes of data in the file */
-  unsigned int data_length;	/* # bytes of data saved at data[] */
-  JOCTET FAR * data;		/* the data contained in the marker */
-  /* the marker length word is not counted in data_length or original_length */
-};
-
-/* Known color spaces. */
-
-typedef enum {
-	JCS_UNKNOWN,		/* error/unspecified */
-	JCS_GRAYSCALE,		/* monochrome */
-	JCS_RGB,		/* red/green/blue, standard RGB (sRGB) */
-	JCS_YCbCr,		/* Y/Cb/Cr (also known as YUV), standard YCC */
-	JCS_CMYK,		/* C/M/Y/K */
-	JCS_YCCK,		/* Y/Cb/Cr/K */
-	JCS_BG_RGB,		/* big gamut red/green/blue, bg-sRGB */
-	JCS_BG_YCC		/* big gamut Y/Cb/Cr, bg-sYCC */
-} J_COLOR_SPACE;
-
-/* Supported color transforms. */
-
-typedef enum {
-	JCT_NONE           = 0,
-	JCT_SUBTRACT_GREEN = 1
-} J_COLOR_TRANSFORM;
-
-/* DCT/IDCT algorithm options. */
-
-typedef enum {
-	JDCT_ISLOW,		/* slow but accurate integer algorithm */
-	JDCT_IFAST,		/* faster, less accurate integer method */
-	JDCT_FLOAT		/* floating-point: accurate, fast on fast HW */
-} J_DCT_METHOD;
-
-#ifndef JDCT_DEFAULT		/* may be overridden in jconfig.h */
-#define JDCT_DEFAULT  JDCT_ISLOW
-#endif
-#ifndef JDCT_FASTEST		/* may be overridden in jconfig.h */
-#define JDCT_FASTEST  JDCT_IFAST
-#endif
-
-/* Dithering options for decompression. */
-
-typedef enum {
-	JDITHER_NONE,		/* no dithering */
-	JDITHER_ORDERED,	/* simple ordered dither */
-	JDITHER_FS		/* Floyd-Steinberg error diffusion dither */
-} J_DITHER_MODE;
-
-
-/* Common fields between JPEG compression and decompression master structs. */
-
-#define jpeg_common_fields \
-  struct jpeg_error_mgr * err;	/* Error handler module */\
-  struct jpeg_memory_mgr * mem;	/* Memory manager module */\
-  struct jpeg_progress_mgr * progress; /* Progress monitor, or NULL if none */\
-  void * client_data;		/* Available for use by application */\
-  boolean is_decompressor;	/* So common code can tell which is which */\
-  int global_state		/* For checking call sequence validity */
-
-/* Routines that are to be used by both halves of the library are declared
- * to receive a pointer to this structure.  There are no actual instances of
- * jpeg_common_struct, only of jpeg_compress_struct and jpeg_decompress_struct.
- */
-struct jpeg_common_struct {
-  jpeg_common_fields;		/* Fields common to both master struct types */
-  /* Additional fields follow in an actual jpeg_compress_struct or
-   * jpeg_decompress_struct.  All three structs must agree on these
-   * initial fields!  (This would be a lot cleaner in C++.)
-   */
-};
-
-typedef struct jpeg_common_struct * j_common_ptr;
-typedef struct jpeg_compress_struct * j_compress_ptr;
-typedef struct jpeg_decompress_struct * j_decompress_ptr;
-
-
-/* Master record for a compression instance */
-
-struct jpeg_compress_struct {
-  jpeg_common_fields;		/* Fields shared with jpeg_decompress_struct */
-
-  /* Destination for compressed data */
-  struct jpeg_destination_mgr * dest;
-
-  /* Description of source image --- these fields must be filled in by
-   * outer application before starting compression.  in_color_space must
-   * be correct before you can even call jpeg_set_defaults().
-   */
-
-  JDIMENSION image_width;	/* input image width */
-  JDIMENSION image_height;	/* input image height */
-  int input_components;		/* # of color components in input image */
-  J_COLOR_SPACE in_color_space;	/* colorspace of input image */
-
-  double input_gamma;		/* image gamma of input image */
-
-  /* Compression parameters --- these fields must be set before calling
-   * jpeg_start_compress().  We recommend calling jpeg_set_defaults() to
-   * initialize everything to reasonable defaults, then changing anything
-   * the application specifically wants to change.  That way you won't get
-   * burnt when new parameters are added.  Also note that there are several
-   * helper routines to simplify changing parameters.
-   */
-
-  unsigned int scale_num, scale_denom; /* fraction by which to scale image */
-
-  JDIMENSION jpeg_width;	/* scaled JPEG image width */
-  JDIMENSION jpeg_height;	/* scaled JPEG image height */
-  /* Dimensions of actual JPEG image that will be written to file,
-   * derived from input dimensions by scaling factors above.
-   * These fields are computed by jpeg_start_compress().
-   * You can also use jpeg_calc_jpeg_dimensions() to determine these values
-   * in advance of calling jpeg_start_compress().
-   */
-
-  int data_precision;		/* bits of precision in image data */
-
-  int num_components;		/* # of color components in JPEG image */
-  J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
-
-  jpeg_component_info * comp_info;
-  /* comp_info[i] describes component that appears i'th in SOF */
-
-  JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS];
-  int q_scale_factor[NUM_QUANT_TBLS];
-  /* ptrs to coefficient quantization tables, or NULL if not defined,
-   * and corresponding scale factors (percentage, initialized 100).
-   */
-
-  JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
-  JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
-  /* ptrs to Huffman coding tables, or NULL if not defined */
-
-  UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
-  UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
-  UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
-
-  int num_scans;		/* # of entries in scan_info array */
-  const jpeg_scan_info * scan_info; /* script for multi-scan file, or NULL */
-  /* The default value of scan_info is NULL, which causes a single-scan
-   * sequential JPEG file to be emitted.  To create a multi-scan file,
-   * set num_scans and scan_info to point to an array of scan definitions.
-   */
-
-  boolean raw_data_in;		/* TRUE=caller supplies downsampled data */
-  boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
-  boolean optimize_coding;	/* TRUE=optimize entropy encoding parms */
-  boolean CCIR601_sampling;	/* TRUE=first samples are cosited */
-  boolean do_fancy_downsampling; /* TRUE=apply fancy downsampling */
-  int smoothing_factor;		/* 1..100, or 0 for no input smoothing */
-  J_DCT_METHOD dct_method;	/* DCT algorithm selector */
-
-  /* The restart interval can be specified in absolute MCUs by setting
-   * restart_interval, or in MCU rows by setting restart_in_rows
-   * (in which case the correct restart_interval will be figured
-   * for each scan).
-   */
-  unsigned int restart_interval; /* MCUs per restart, or 0 for no restart */
-  int restart_in_rows;		/* if > 0, MCU rows per restart interval */
-
-  /* Parameters controlling emission of special markers. */
-
-  boolean write_JFIF_header;	/* should a JFIF marker be written? */
-  UINT8 JFIF_major_version;	/* What to write for the JFIF version number */
-  UINT8 JFIF_minor_version;
-  /* These three values are not used by the JPEG code, merely copied */
-  /* into the JFIF APP0 marker.  density_unit can be 0 for unknown, */
-  /* 1 for dots/inch, or 2 for dots/cm.  Note that the pixel aspect */
-  /* ratio is defined by X_density/Y_density even when density_unit=0. */
-  UINT8 density_unit;		/* JFIF code for pixel size units */
-  UINT16 X_density;		/* Horizontal pixel density */
-  UINT16 Y_density;		/* Vertical pixel density */
-  boolean write_Adobe_marker;	/* should an Adobe marker be written? */
-
-  J_COLOR_TRANSFORM color_transform;
-  /* Color transform identifier, writes LSE marker if nonzero */
-
-  /* State variable: index of next scanline to be written to
-   * jpeg_write_scanlines().  Application may use this to control its
-   * processing loop, e.g., "while (next_scanline < image_height)".
-   */
-
-  JDIMENSION next_scanline;	/* 0 .. image_height-1  */
-
-  /* Remaining fields are known throughout compressor, but generally
-   * should not be touched by a surrounding application.
-   */
-
-  /*
-   * These fields are computed during compression startup
-   */
-  boolean progressive_mode;	/* TRUE if scan script uses progressive mode */
-  int max_h_samp_factor;	/* largest h_samp_factor */
-  int max_v_samp_factor;	/* largest v_samp_factor */
-
-  int min_DCT_h_scaled_size;	/* smallest DCT_h_scaled_size of any component */
-  int min_DCT_v_scaled_size;	/* smallest DCT_v_scaled_size of any component */
-
-  JDIMENSION total_iMCU_rows;	/* # of iMCU rows to be input to coef ctlr */
-  /* The coefficient controller receives data in units of MCU rows as defined
-   * for fully interleaved scans (whether the JPEG file is interleaved or not).
-   * There are v_samp_factor * DCT_v_scaled_size sample rows of each component
-   * in an "iMCU" (interleaved MCU) row.
-   */
-
-  /*
-   * These fields are valid during any one scan.
-   * They describe the components and MCUs actually appearing in the scan.
-   */
-  int comps_in_scan;		/* # of JPEG components in this scan */
-  jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
-  /* *cur_comp_info[i] describes component that appears i'th in SOS */
-
-  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
-  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
-
-  int blocks_in_MCU;		/* # of DCT blocks per MCU */
-  int MCU_membership[C_MAX_BLOCKS_IN_MCU];
-  /* MCU_membership[i] is index in cur_comp_info of component owning */
-  /* i'th block in an MCU */
-
-  int Ss, Se, Ah, Al;		/* progressive JPEG parameters for scan */
-
-  int block_size;		/* the basic DCT block size: 1..16 */
-  const int * natural_order;	/* natural-order position array */
-  int lim_Se;			/* min( Se, DCTSIZE2-1 ) */
-
-  /*
-   * Links to compression subobjects (methods and private variables of modules)
-   */
-  struct jpeg_comp_master * master;
-  struct jpeg_c_main_controller * main;
-  struct jpeg_c_prep_controller * prep;
-  struct jpeg_c_coef_controller * coef;
-  struct jpeg_marker_writer * marker;
-  struct jpeg_color_converter * cconvert;
-  struct jpeg_downsampler * downsample;
-  struct jpeg_forward_dct * fdct;
-  struct jpeg_entropy_encoder * entropy;
-  jpeg_scan_info * script_space; /* workspace for jpeg_simple_progression */
-  int script_space_size;
-};
-
-
-/* Master record for a decompression instance */
-
-struct jpeg_decompress_struct {
-  jpeg_common_fields;		/* Fields shared with jpeg_compress_struct */
-
-  /* Source of compressed data */
-  struct jpeg_source_mgr * src;
-
-  /* Basic description of image --- filled in by jpeg_read_header(). */
-  /* Application may inspect these values to decide how to process image. */
-
-  JDIMENSION image_width;	/* nominal image width (from SOF marker) */
-  JDIMENSION image_height;	/* nominal image height */
-  int num_components;		/* # of color components in JPEG image */
-  J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
-
-  /* Decompression processing parameters --- these fields must be set before
-   * calling jpeg_start_decompress().  Note that jpeg_read_header() initializes
-   * them to default values.
-   */
-
-  J_COLOR_SPACE out_color_space; /* colorspace for output */
-
-  unsigned int scale_num, scale_denom; /* fraction by which to scale image */
-
-  double output_gamma;		/* image gamma wanted in output */
-
-  boolean buffered_image;	/* TRUE=multiple output passes */
-  boolean raw_data_out;		/* TRUE=downsampled data wanted */
-
-  J_DCT_METHOD dct_method;	/* IDCT algorithm selector */
-  boolean do_fancy_upsampling;	/* TRUE=apply fancy upsampling */
-  boolean do_block_smoothing;	/* TRUE=apply interblock smoothing */
-
-  boolean quantize_colors;	/* TRUE=colormapped output wanted */
-  /* the following are ignored if not quantize_colors: */
-  J_DITHER_MODE dither_mode;	/* type of color dithering to use */
-  boolean two_pass_quantize;	/* TRUE=use two-pass color quantization */
-  int desired_number_of_colors;	/* max # colors to use in created colormap */
-  /* these are significant only in buffered-image mode: */
-  boolean enable_1pass_quant;	/* enable future use of 1-pass quantizer */
-  boolean enable_external_quant;/* enable future use of external colormap */
-  boolean enable_2pass_quant;	/* enable future use of 2-pass quantizer */
-
-  /* Description of actual output image that will be returned to application.
-   * These fields are computed by jpeg_start_decompress().
-   * You can also use jpeg_calc_output_dimensions() to determine these values
-   * in advance of calling jpeg_start_decompress().
-   */
-
-  JDIMENSION output_width;	/* scaled image width */
-  JDIMENSION output_height;	/* scaled image height */
-  int out_color_components;	/* # of color components in out_color_space */
-  int output_components;	/* # of color components returned */
-  /* output_components is 1 (a colormap index) when quantizing colors;
-   * otherwise it equals out_color_components.
-   */
-  int rec_outbuf_height;	/* min recommended height of scanline buffer */
-  /* If the buffer passed to jpeg_read_scanlines() is less than this many rows
-   * high, space and time will be wasted due to unnecessary data copying.
-   * Usually rec_outbuf_height will be 1 or 2, at most 4.
-   */
-
-  /* When quantizing colors, the output colormap is described by these fields.
-   * The application can supply a colormap by setting colormap non-NULL before
-   * calling jpeg_start_decompress; otherwise a colormap is created during
-   * jpeg_start_decompress or jpeg_start_output.
-   * The map has out_color_components rows and actual_number_of_colors columns.
-   */
-  int actual_number_of_colors;	/* number of entries in use */
-  JSAMPARRAY colormap;		/* The color map as a 2-D pixel array */
-
-  /* State variables: these variables indicate the progress of decompression.
-   * The application may examine these but must not modify them.
-   */
-
-  /* Row index of next scanline to be read from jpeg_read_scanlines().
-   * Application may use this to control its processing loop, e.g.,
-   * "while (output_scanline < output_height)".
-   */
-  JDIMENSION output_scanline;	/* 0 .. output_height-1  */
-
-  /* Current input scan number and number of iMCU rows completed in scan.
-   * These indicate the progress of the decompressor input side.
-   */
-  int input_scan_number;	/* Number of SOS markers seen so far */
-  JDIMENSION input_iMCU_row;	/* Number of iMCU rows completed */
-
-  /* The "output scan number" is the notional scan being displayed by the
-   * output side.  The decompressor will not allow output scan/row number
-   * to get ahead of input scan/row, but it can fall arbitrarily far behind.
-   */
-  int output_scan_number;	/* Nominal scan number being displayed */
-  JDIMENSION output_iMCU_row;	/* Number of iMCU rows read */
-
-  /* Current progression status.  coef_bits[c][i] indicates the precision
-   * with which component c's DCT coefficient i (in zigzag order) is known.
-   * It is -1 when no data has yet been received, otherwise it is the point
-   * transform (shift) value for the most recent scan of the coefficient
-   * (thus, 0 at completion of the progression).
-   * This pointer is NULL when reading a non-progressive file.
-   */
-  int (*coef_bits)[DCTSIZE2];	/* -1 or current Al value for each coef */
-
-  /* Internal JPEG parameters --- the application usually need not look at
-   * these fields.  Note that the decompressor output side may not use
-   * any parameters that can change between scans.
-   */
-
-  /* Quantization and Huffman tables are carried forward across input
-   * datastreams when processing abbreviated JPEG datastreams.
-   */
-
-  JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS];
-  /* ptrs to coefficient quantization tables, or NULL if not defined */
-
-  JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
-  JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
-  /* ptrs to Huffman coding tables, or NULL if not defined */
-
-  /* These parameters are never carried across datastreams, since they
-   * are given in SOF/SOS markers or defined to be reset by SOI.
-   */
-
-  int data_precision;		/* bits of precision in image data */
-
-  jpeg_component_info * comp_info;
-  /* comp_info[i] describes component that appears i'th in SOF */
-
-  boolean is_baseline;		/* TRUE if Baseline SOF0 encountered */
-  boolean progressive_mode;	/* TRUE if SOFn specifies progressive mode */
-  boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
-
-  UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
-  UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
-  UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
-
-  unsigned int restart_interval; /* MCUs per restart interval, or 0 for no restart */
-
-  /* These fields record data obtained from optional markers recognized by
-   * the JPEG library.
-   */
-  boolean saw_JFIF_marker;	/* TRUE iff a JFIF APP0 marker was found */
-  /* Data copied from JFIF marker; only valid if saw_JFIF_marker is TRUE: */
-  UINT8 JFIF_major_version;	/* JFIF version number */
-  UINT8 JFIF_minor_version;
-  UINT8 density_unit;		/* JFIF code for pixel size units */
-  UINT16 X_density;		/* Horizontal pixel density */
-  UINT16 Y_density;		/* Vertical pixel density */
-  boolean saw_Adobe_marker;	/* TRUE iff an Adobe APP14 marker was found */
-  UINT8 Adobe_transform;	/* Color transform code from Adobe marker */
-
-  J_COLOR_TRANSFORM color_transform;
-  /* Color transform identifier derived from LSE marker, otherwise zero */
-
-  boolean CCIR601_sampling;	/* TRUE=first samples are cosited */
-
-  /* Aside from the specific data retained from APPn markers known to the
-   * library, the uninterpreted contents of any or all APPn and COM markers
-   * can be saved in a list for examination by the application.
-   */
-  jpeg_saved_marker_ptr marker_list; /* Head of list of saved markers */
-
-  /* Remaining fields are known throughout decompressor, but generally
-   * should not be touched by a surrounding application.
-   */
-
-  /*
-   * These fields are computed during decompression startup
-   */
-  int max_h_samp_factor;	/* largest h_samp_factor */
-  int max_v_samp_factor;	/* largest v_samp_factor */
-
-  int min_DCT_h_scaled_size;	/* smallest DCT_h_scaled_size of any component */
-  int min_DCT_v_scaled_size;	/* smallest DCT_v_scaled_size of any component */
-
-  JDIMENSION total_iMCU_rows;	/* # of iMCU rows in image */
-  /* The coefficient controller's input and output progress is measured in
-   * units of "iMCU" (interleaved MCU) rows.  These are the same as MCU rows
-   * in fully interleaved JPEG scans, but are used whether the scan is
-   * interleaved or not.  We define an iMCU row as v_samp_factor DCT block
-   * rows of each component.  Therefore, the IDCT output contains
-   * v_samp_factor * DCT_v_scaled_size sample rows of a component per iMCU row.
-   */
-
-  JSAMPLE * sample_range_limit; /* table for fast range-limiting */
-
-  /*
-   * These fields are valid during any one scan.
-   * They describe the components and MCUs actually appearing in the scan.
-   * Note that the decompressor output side must not use these fields.
-   */
-  int comps_in_scan;		/* # of JPEG components in this scan */
-  jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
-  /* *cur_comp_info[i] describes component that appears i'th in SOS */
-
-  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
-  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
-
-  int blocks_in_MCU;		/* # of DCT blocks per MCU */
-  int MCU_membership[D_MAX_BLOCKS_IN_MCU];
-  /* MCU_membership[i] is index in cur_comp_info of component owning */
-  /* i'th block in an MCU */
-
-  int Ss, Se, Ah, Al;		/* progressive JPEG parameters for scan */
-
-  /* These fields are derived from Se of first SOS marker.
-   */
-  int block_size;		/* the basic DCT block size: 1..16 */
-  const int * natural_order; /* natural-order position array for entropy decode */
-  int lim_Se;			/* min( Se, DCTSIZE2-1 ) for entropy decode */
-
-  /* This field is shared between entropy decoder and marker parser.
-   * It is either zero or the code of a JPEG marker that has been
-   * read from the data source, but has not yet been processed.
-   */
-  int unread_marker;
-
-  /*
-   * Links to decompression subobjects (methods, private variables of modules)
-   */
-  struct jpeg_decomp_master * master;
-  struct jpeg_d_main_controller * main;
-  struct jpeg_d_coef_controller * coef;
-  struct jpeg_d_post_controller * post;
-  struct jpeg_input_controller * inputctl;
-  struct jpeg_marker_reader * marker;
-  struct jpeg_entropy_decoder * entropy;
-  struct jpeg_inverse_dct * idct;
-  struct jpeg_upsampler * upsample;
-  struct jpeg_color_deconverter * cconvert;
-  struct jpeg_color_quantizer * cquantize;
-};
-
-
-/* "Object" declarations for JPEG modules that may be supplied or called
- * directly by the surrounding application.
- * As with all objects in the JPEG library, these structs only define the
- * publicly visible methods and state variables of a module.  Additional
- * private fields may exist after the public ones.
- */
-
-
-/* Error handler object */
-
-struct jpeg_error_mgr {
-  /* Error exit handler: does not return to caller */
-  JMETHOD(noreturn_t, error_exit, (j_common_ptr cinfo));
-  /* Conditionally emit a trace or warning message */
-  JMETHOD(void, emit_message, (j_common_ptr cinfo, int msg_level));
-  /* Routine that actually outputs a trace or error message */
-  JMETHOD(void, output_message, (j_common_ptr cinfo));
-  /* Format a message string for the most recent JPEG error or message */
-  JMETHOD(void, format_message, (j_common_ptr cinfo, char * buffer));
-#define JMSG_LENGTH_MAX  200	/* recommended size of format_message buffer */
-  /* Reset error state variables at start of a new image */
-  JMETHOD(void, reset_error_mgr, (j_common_ptr cinfo));
-
-  /* The message ID code and any parameters are saved here.
-   * A message can have one string parameter or up to 8 int parameters.
-   */
-  int msg_code;
-#define JMSG_STR_PARM_MAX  80
-  union {
-    int i[8];
-    char s[JMSG_STR_PARM_MAX];
-  } msg_parm;
-
-  /* Standard state variables for error facility */
-
-  int trace_level;		/* max msg_level that will be displayed */
-
-  /* For recoverable corrupt-data errors, we emit a warning message,
-   * but keep going unless emit_message chooses to abort.  emit_message
-   * should count warnings in num_warnings.  The surrounding application
-   * can check for bad data by seeing if num_warnings is nonzero at the
-   * end of processing.
-   */
-  long num_warnings;		/* number of corrupt-data warnings */
-
-  /* These fields point to the table(s) of error message strings.
-   * An application can change the table pointer to switch to a different
-   * message list (typically, to change the language in which errors are
-   * reported).  Some applications may wish to add additional error codes
-   * that will be handled by the JPEG library error mechanism; the second
-   * table pointer is used for this purpose.
-   *
-   * First table includes all errors generated by JPEG library itself.
-   * Error code 0 is reserved for a "no such error string" message.
-   */
-  const char * const * jpeg_message_table; /* Library errors */
-  int last_jpeg_message;    /* Table contains strings 0..last_jpeg_message */
-  /* Second table can be added by application (see cjpeg/djpeg for example).
-   * It contains strings numbered first_addon_message..last_addon_message.
-   */
-  const char * const * addon_message_table; /* Non-library errors */
-  int first_addon_message;	/* code for first string in addon table */
-  int last_addon_message;	/* code for last string in addon table */
-};
-
-
-/* Progress monitor object */
-
-struct jpeg_progress_mgr {
-  JMETHOD(void, progress_monitor, (j_common_ptr cinfo));
-
-  long pass_counter;		/* work units completed in this pass */
-  long pass_limit;		/* total number of work units in this pass */
-  int completed_passes;		/* passes completed so far */
-  int total_passes;		/* total number of passes expected */
-};
-
-
-/* Data destination object for compression */
-
-struct jpeg_destination_mgr {
-  JOCTET * next_output_byte;	/* => next byte to write in buffer */
-  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
-
-  JMETHOD(void, init_destination, (j_compress_ptr cinfo));
-  JMETHOD(boolean, empty_output_buffer, (j_compress_ptr cinfo));
-  JMETHOD(void, term_destination, (j_compress_ptr cinfo));
-};
-
-
-/* Data source object for decompression */
-
-struct jpeg_source_mgr {
-  const JOCTET * next_input_byte; /* => next byte to read from buffer */
-  size_t bytes_in_buffer;	/* # of bytes remaining in buffer */
-
-  JMETHOD(void, init_source, (j_decompress_ptr cinfo));
-  JMETHOD(boolean, fill_input_buffer, (j_decompress_ptr cinfo));
-  JMETHOD(void, skip_input_data, (j_decompress_ptr cinfo, long num_bytes));
-  JMETHOD(boolean, resync_to_restart, (j_decompress_ptr cinfo, int desired));
-  JMETHOD(void, term_source, (j_decompress_ptr cinfo));
-};
-
-
-/* Memory manager object.
- * Allocates "small" objects (a few K total), "large" objects (tens of K),
- * and "really big" objects (virtual arrays with backing store if needed).
- * The memory manager does not allow individual objects to be freed; rather,
- * each created object is assigned to a pool, and whole pools can be freed
- * at once.  This is faster and more convenient than remembering exactly what
- * to free, especially where malloc()/free() are not too speedy.
- * NB: alloc routines never return NULL.  They exit to error_exit if not
- * successful.
- */
-
-#define JPOOL_PERMANENT	0	/* lasts until master record is destroyed */
-#define JPOOL_IMAGE	1	/* lasts until done with image/datastream */
-#define JPOOL_NUMPOOLS	2
-
-typedef struct jvirt_sarray_control * jvirt_sarray_ptr;
-typedef struct jvirt_barray_control * jvirt_barray_ptr;
-
-
-struct jpeg_memory_mgr {
-  /* Method pointers */
-  JMETHOD(void *, alloc_small, (j_common_ptr cinfo, int pool_id,
-				size_t sizeofobject));
-  JMETHOD(void FAR *, alloc_large, (j_common_ptr cinfo, int pool_id,
-				     size_t sizeofobject));
-  JMETHOD(JSAMPARRAY, alloc_sarray, (j_common_ptr cinfo, int pool_id,
-				     JDIMENSION samplesperrow,
-				     JDIMENSION numrows));
-  JMETHOD(JBLOCKARRAY, alloc_barray, (j_common_ptr cinfo, int pool_id,
-				      JDIMENSION blocksperrow,
-				      JDIMENSION numrows));
-  JMETHOD(jvirt_sarray_ptr, request_virt_sarray, (j_common_ptr cinfo,
-						  int pool_id,
-						  boolean pre_zero,
-						  JDIMENSION samplesperrow,
-						  JDIMENSION numrows,
-						  JDIMENSION maxaccess));
-  JMETHOD(jvirt_barray_ptr, request_virt_barray, (j_common_ptr cinfo,
-						  int pool_id,
-						  boolean pre_zero,
-						  JDIMENSION blocksperrow,
-						  JDIMENSION numrows,
-						  JDIMENSION maxaccess));
-  JMETHOD(void, realize_virt_arrays, (j_common_ptr cinfo));
-  JMETHOD(JSAMPARRAY, access_virt_sarray, (j_common_ptr cinfo,
-					   jvirt_sarray_ptr ptr,
-					   JDIMENSION start_row,
-					   JDIMENSION num_rows,
-					   boolean writable));
-  JMETHOD(JBLOCKARRAY, access_virt_barray, (j_common_ptr cinfo,
-					    jvirt_barray_ptr ptr,
-					    JDIMENSION start_row,
-					    JDIMENSION num_rows,
-					    boolean writable));
-  JMETHOD(void, free_pool, (j_common_ptr cinfo, int pool_id));
-  JMETHOD(void, self_destruct, (j_common_ptr cinfo));
-
-  /* Limit on memory allocation for this JPEG object.  (Note that this is
-   * merely advisory, not a guaranteed maximum; it only affects the space
-   * used for virtual-array buffers.)  May be changed by outer application
-   * after creating the JPEG object.
-   */
-  long max_memory_to_use;
-
-  /* Maximum allocation request accepted by alloc_large. */
-  long max_alloc_chunk;
-};
-
-
-/* Routine signature for application-supplied marker processing methods.
- * Need not pass marker code since it is stored in cinfo->unread_marker.
- */
-typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo));
-
-
-/* Declarations for routines called by application.
- * The JPP macro hides prototype parameters from compilers that can't cope.
- * Note JPP requires double parentheses.
- */
-
-#ifdef HAVE_PROTOTYPES
-#define JPP(arglist)	arglist
-#else
-#define JPP(arglist)	()
-#endif
-
-
-/* Short forms of external names for systems with brain-damaged linkers.
- * We shorten external names to be unique in the first six letters, which
- * is good enough for all known systems.
- * (If your compiler itself needs names to be unique in less than 15 
- * characters, you are out of luck.  Get a better compiler.)
- */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_std_error		jStdError
-#define jpeg_CreateCompress	jCreaCompress
-#define jpeg_CreateDecompress	jCreaDecompress
-#define jpeg_destroy_compress	jDestCompress
-#define jpeg_destroy_decompress	jDestDecompress
-#define jpeg_stdio_dest		jStdDest
-#define jpeg_stdio_src		jStdSrc
-#define jpeg_mem_dest		jMemDest
-#define jpeg_mem_src		jMemSrc
-#define jpeg_set_defaults	jSetDefaults
-#define jpeg_set_colorspace	jSetColorspace
-#define jpeg_default_colorspace	jDefColorspace
-#define jpeg_set_quality	jSetQuality
-#define jpeg_set_linear_quality	jSetLQuality
-#define jpeg_default_qtables	jDefQTables
-#define jpeg_add_quant_table	jAddQuantTable
-#define jpeg_quality_scaling	jQualityScaling
-#define jpeg_simple_progression	jSimProgress
-#define jpeg_suppress_tables	jSuppressTables
-#define jpeg_alloc_quant_table	jAlcQTable
-#define jpeg_alloc_huff_table	jAlcHTable
-#define jpeg_std_huff_table	jStdHTable
-#define jpeg_start_compress	jStrtCompress
-#define jpeg_write_scanlines	jWrtScanlines
-#define jpeg_finish_compress	jFinCompress
-#define jpeg_calc_jpeg_dimensions	jCjpegDimensions
-#define jpeg_write_raw_data	jWrtRawData
-#define jpeg_write_marker	jWrtMarker
-#define jpeg_write_m_header	jWrtMHeader
-#define jpeg_write_m_byte	jWrtMByte
-#define jpeg_write_tables	jWrtTables
-#define jpeg_read_header	jReadHeader
-#define jpeg_start_decompress	jStrtDecompress
-#define jpeg_read_scanlines	jReadScanlines
-#define jpeg_finish_decompress	jFinDecompress
-#define jpeg_read_raw_data	jReadRawData
-#define jpeg_has_multiple_scans	jHasMultScn
-#define jpeg_start_output	jStrtOutput
-#define jpeg_finish_output	jFinOutput
-#define jpeg_input_complete	jInComplete
-#define jpeg_new_colormap	jNewCMap
-#define jpeg_consume_input	jConsumeInput
-#define jpeg_core_output_dimensions	jCoreDimensions
-#define jpeg_calc_output_dimensions	jCalcDimensions
-#define jpeg_save_markers	jSaveMarkers
-#define jpeg_set_marker_processor	jSetMarker
-#define jpeg_read_coefficients	jReadCoefs
-#define jpeg_write_coefficients	jWrtCoefs
-#define jpeg_copy_critical_parameters	jCopyCrit
-#define jpeg_abort_compress	jAbrtCompress
-#define jpeg_abort_decompress	jAbrtDecompress
-#define jpeg_abort		jAbort
-#define jpeg_destroy		jDestroy
-#define jpeg_resync_to_restart	jResyncRestart
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
-
-/* Default error-management setup */
-EXTERN(struct jpeg_error_mgr *) jpeg_std_error
-	JPP((struct jpeg_error_mgr * err));
-
-/* Initialization of JPEG compression objects.
- * jpeg_create_compress() and jpeg_create_decompress() are the exported
- * names that applications should call.  These expand to calls on
- * jpeg_CreateCompress and jpeg_CreateDecompress with additional information
- * passed for version mismatch checking.
- * NB: you must set up the error-manager BEFORE calling jpeg_create_xxx.
- */
-#define jpeg_create_compress(cinfo) \
-    jpeg_CreateCompress((cinfo), JPEG_LIB_VERSION, \
-			(size_t) sizeof(struct jpeg_compress_struct))
-#define jpeg_create_decompress(cinfo) \
-    jpeg_CreateDecompress((cinfo), JPEG_LIB_VERSION, \
-			  (size_t) sizeof(struct jpeg_decompress_struct))
-EXTERN(void) jpeg_CreateCompress JPP((j_compress_ptr cinfo,
-				      int version, size_t structsize));
-EXTERN(void) jpeg_CreateDecompress JPP((j_decompress_ptr cinfo,
-					int version, size_t structsize));
-/* Destruction of JPEG compression objects */
-EXTERN(void) jpeg_destroy_compress JPP((j_compress_ptr cinfo));
-EXTERN(void) jpeg_destroy_decompress JPP((j_decompress_ptr cinfo));
-
-/* Standard data source and destination managers: stdio streams. */
-/* Caller is responsible for opening the file before and closing after. */
-EXTERN(void) jpeg_stdio_dest JPP((j_compress_ptr cinfo, FILE * outfile));
-EXTERN(void) jpeg_stdio_src JPP((j_decompress_ptr cinfo, FILE * infile));
-
-/* Data source and destination managers: memory buffers. */
-EXTERN(void) jpeg_mem_dest JPP((j_compress_ptr cinfo,
-			       unsigned char ** outbuffer,
-			       size_t * outsize));
-EXTERN(void) jpeg_mem_src JPP((j_decompress_ptr cinfo,
-			      const unsigned char * inbuffer,
-			      size_t insize));
-
-/* Default parameter setup for compression */
-EXTERN(void) jpeg_set_defaults JPP((j_compress_ptr cinfo));
-/* Compression parameter setup aids */
-EXTERN(void) jpeg_set_colorspace JPP((j_compress_ptr cinfo,
-				      J_COLOR_SPACE colorspace));
-EXTERN(void) jpeg_default_colorspace JPP((j_compress_ptr cinfo));
-EXTERN(void) jpeg_set_quality JPP((j_compress_ptr cinfo, int quality,
-				   boolean force_baseline));
-EXTERN(void) jpeg_set_linear_quality JPP((j_compress_ptr cinfo,
-					  int scale_factor,
-					  boolean force_baseline));
-EXTERN(void) jpeg_default_qtables JPP((j_compress_ptr cinfo,
-				       boolean force_baseline));
-EXTERN(void) jpeg_add_quant_table JPP((j_compress_ptr cinfo, int which_tbl,
-				       const unsigned int *basic_table,
-				       int scale_factor,
-				       boolean force_baseline));
-EXTERN(int) jpeg_quality_scaling JPP((int quality));
-EXTERN(void) jpeg_simple_progression JPP((j_compress_ptr cinfo));
-EXTERN(void) jpeg_suppress_tables JPP((j_compress_ptr cinfo,
-				       boolean suppress));
-EXTERN(JQUANT_TBL *) jpeg_alloc_quant_table JPP((j_common_ptr cinfo));
-EXTERN(JHUFF_TBL *) jpeg_alloc_huff_table JPP((j_common_ptr cinfo));
-EXTERN(JHUFF_TBL *) jpeg_std_huff_table JPP((j_common_ptr cinfo,
-					     boolean isDC, int tblno));
-
-/* Main entry points for compression */
-EXTERN(void) jpeg_start_compress JPP((j_compress_ptr cinfo,
-				      boolean write_all_tables));
-EXTERN(JDIMENSION) jpeg_write_scanlines JPP((j_compress_ptr cinfo,
-					     JSAMPARRAY scanlines,
-					     JDIMENSION num_lines));
-EXTERN(void) jpeg_finish_compress JPP((j_compress_ptr cinfo));
-
-/* Precalculate JPEG dimensions for current compression parameters. */
-EXTERN(void) jpeg_calc_jpeg_dimensions JPP((j_compress_ptr cinfo));
-
-/* Replaces jpeg_write_scanlines when writing raw downsampled data. */
-EXTERN(JDIMENSION) jpeg_write_raw_data JPP((j_compress_ptr cinfo,
-					    JSAMPIMAGE data,
-					    JDIMENSION num_lines));
-
-/* Write a special marker.  See libjpeg.txt concerning safe usage. */
-EXTERN(void) jpeg_write_marker
-	JPP((j_compress_ptr cinfo, int marker,
-	     const JOCTET * dataptr, unsigned int datalen));
-/* Same, but piecemeal. */
-EXTERN(void) jpeg_write_m_header
-	JPP((j_compress_ptr cinfo, int marker, unsigned int datalen));
-EXTERN(void) jpeg_write_m_byte
-	JPP((j_compress_ptr cinfo, int val));
-
-/* Alternate compression function: just write an abbreviated table file */
-EXTERN(void) jpeg_write_tables JPP((j_compress_ptr cinfo));
-
-/* Decompression startup: read start of JPEG datastream to see what's there */
-EXTERN(int) jpeg_read_header JPP((j_decompress_ptr cinfo,
-				  boolean require_image));
-/* Return value is one of: */
-#define JPEG_SUSPENDED		0 /* Suspended due to lack of input data */
-#define JPEG_HEADER_OK		1 /* Found valid image datastream */
-#define JPEG_HEADER_TABLES_ONLY	2 /* Found valid table-specs-only datastream */
-/* If you pass require_image = TRUE (normal case), you need not check for
- * a TABLES_ONLY return code; an abbreviated file will cause an error exit.
- * JPEG_SUSPENDED is only possible if you use a data source module that can
- * give a suspension return (the stdio source module doesn't).
- */
-
-/* Main entry points for decompression */
-EXTERN(boolean) jpeg_start_decompress JPP((j_decompress_ptr cinfo));
-EXTERN(JDIMENSION) jpeg_read_scanlines JPP((j_decompress_ptr cinfo,
-					    JSAMPARRAY scanlines,
-					    JDIMENSION max_lines));
-EXTERN(boolean) jpeg_finish_decompress JPP((j_decompress_ptr cinfo));
-
-/* Replaces jpeg_read_scanlines when reading raw downsampled data. */
-EXTERN(JDIMENSION) jpeg_read_raw_data JPP((j_decompress_ptr cinfo,
-					   JSAMPIMAGE data,
-					   JDIMENSION max_lines));
-
-/* Additional entry points for buffered-image mode. */
-EXTERN(boolean) jpeg_has_multiple_scans JPP((j_decompress_ptr cinfo));
-EXTERN(boolean) jpeg_start_output JPP((j_decompress_ptr cinfo,
-				       int scan_number));
-EXTERN(boolean) jpeg_finish_output JPP((j_decompress_ptr cinfo));
-EXTERN(boolean) jpeg_input_complete JPP((j_decompress_ptr cinfo));
-EXTERN(void) jpeg_new_colormap JPP((j_decompress_ptr cinfo));
-EXTERN(int) jpeg_consume_input JPP((j_decompress_ptr cinfo));
-/* Return value is one of: */
-/* #define JPEG_SUSPENDED	0    Suspended due to lack of input data */
-#define JPEG_REACHED_SOS	1 /* Reached start of new scan */
-#define JPEG_REACHED_EOI	2 /* Reached end of image */
-#define JPEG_ROW_COMPLETED	3 /* Completed one iMCU row */
-#define JPEG_SCAN_COMPLETED	4 /* Completed last iMCU row of a scan */
-
-/* Precalculate output dimensions for current decompression parameters. */
-EXTERN(void) jpeg_core_output_dimensions JPP((j_decompress_ptr cinfo));
-EXTERN(void) jpeg_calc_output_dimensions JPP((j_decompress_ptr cinfo));
-
-/* Control saving of COM and APPn markers into marker_list. */
-EXTERN(void) jpeg_save_markers
-	JPP((j_decompress_ptr cinfo, int marker_code,
-	     unsigned int length_limit));
-
-/* Install a special processing method for COM or APPn markers. */
-EXTERN(void) jpeg_set_marker_processor
-	JPP((j_decompress_ptr cinfo, int marker_code,
-	     jpeg_marker_parser_method routine));
-
-/* Read or write raw DCT coefficients --- useful for lossless transcoding. */
-EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients JPP((j_decompress_ptr cinfo));
-EXTERN(void) jpeg_write_coefficients JPP((j_compress_ptr cinfo,
-					  jvirt_barray_ptr * coef_arrays));
-EXTERN(void) jpeg_copy_critical_parameters JPP((j_decompress_ptr srcinfo,
-						j_compress_ptr dstinfo));
-
-/* If you choose to abort compression or decompression before completing
- * jpeg_finish_(de)compress, then you need to clean up to release memory,
- * temporary files, etc.  You can just call jpeg_destroy_(de)compress
- * if you're done with the JPEG object, but if you want to clean it up and
- * reuse it, call this:
- */
-EXTERN(void) jpeg_abort_compress JPP((j_compress_ptr cinfo));
-EXTERN(void) jpeg_abort_decompress JPP((j_decompress_ptr cinfo));
-
-/* Generic versions of jpeg_abort and jpeg_destroy that work on either
- * flavor of JPEG object.  These may be more convenient in some places.
- */
-EXTERN(void) jpeg_abort JPP((j_common_ptr cinfo));
-EXTERN(void) jpeg_destroy JPP((j_common_ptr cinfo));
-
-/* Default restart-marker-resync procedure for use by data source modules */
-EXTERN(boolean) jpeg_resync_to_restart JPP((j_decompress_ptr cinfo,
-					    int desired));
-
-
-/* These marker codes are exported since applications and data source modules
- * are likely to want to use them.
- */
-
-#define JPEG_RST0	0xD0	/* RST0 marker code */
-#define JPEG_EOI	0xD9	/* EOI marker code */
-#define JPEG_APP0	0xE0	/* APP0 marker code */
-#define JPEG_COM	0xFE	/* COM marker code */
-
-
-/* If we have a brain-damaged compiler that emits warnings (or worse, errors)
- * for structure definitions that are never filled in, keep it quiet by
- * supplying dummy definitions for the various substructures.
- */
-
-#ifdef INCOMPLETE_TYPES_BROKEN
-#ifndef JPEG_INTERNALS		/* will be defined in jpegint.h */
-struct jvirt_sarray_control { long dummy; };
-struct jvirt_barray_control { long dummy; };
-struct jpeg_comp_master { long dummy; };
-struct jpeg_c_main_controller { long dummy; };
-struct jpeg_c_prep_controller { long dummy; };
-struct jpeg_c_coef_controller { long dummy; };
-struct jpeg_marker_writer { long dummy; };
-struct jpeg_color_converter { long dummy; };
-struct jpeg_downsampler { long dummy; };
-struct jpeg_forward_dct { long dummy; };
-struct jpeg_entropy_encoder { long dummy; };
-struct jpeg_decomp_master { long dummy; };
-struct jpeg_d_main_controller { long dummy; };
-struct jpeg_d_coef_controller { long dummy; };
-struct jpeg_d_post_controller { long dummy; };
-struct jpeg_input_controller { long dummy; };
-struct jpeg_marker_reader { long dummy; };
-struct jpeg_entropy_decoder { long dummy; };
-struct jpeg_inverse_dct { long dummy; };
-struct jpeg_upsampler { long dummy; };
-struct jpeg_color_deconverter { long dummy; };
-struct jpeg_color_quantizer { long dummy; };
-#endif /* JPEG_INTERNALS */
-#endif /* INCOMPLETE_TYPES_BROKEN */
-
-
-/*
- * The JPEG library modules define JPEG_INTERNALS before including this file.
- * The internal structure declarations are read only when that is true.
- * Applications using the library should not include jpegint.h, but may wish
- * to include jerror.h.
- */
-
-#ifdef JPEG_INTERNALS
-#include "jpegint.h"		/* fetch private declarations */
-#include "jerror.h"		/* fetch error codes too */
-#endif
-
-#ifdef __cplusplus
-#ifndef DONT_USE_EXTERN_C
-}
-#endif
-#endif
-
-#endif /* JPEGLIB_H */
diff --git a/dep/libjpeg/libjpeg.vcxproj b/dep/libjpeg/libjpeg.vcxproj
deleted file mode 100644
index 655271d41..000000000
--- a/dep/libjpeg/libjpeg.vcxproj
+++ /dev/null
@@ -1,76 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <Import Project="..\msvc\vsprops\Configurations.props" />
-  <PropertyGroup Label="Globals">
-    <ProjectGuid>{EC3B6685-0B6E-4767-84AB-39B75EEAD2E2}</ProjectGuid>
-  </PropertyGroup>
-  <ItemGroup>
-    <ClInclude Include="include\jconfig.h" />
-    <ClInclude Include="include\jerror.h" />
-    <ClInclude Include="include\jmorecfg.h" />
-    <ClInclude Include="include\jpegint.h" />
-    <ClInclude Include="include\jpeglib.h" />
-    <ClInclude Include="src\jdct.h" />
-    <ClInclude Include="src\jinclude.h" />
-    <ClInclude Include="src\jmemsys.h" />
-    <ClInclude Include="src\jversion.h" />
-    <ClInclude Include="src\transupp.h" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="src\jaricom.c" />
-    <ClCompile Include="src\jcapimin.c" />
-    <ClCompile Include="src\jcapistd.c" />
-    <ClCompile Include="src\jcarith.c" />
-    <ClCompile Include="src\jccoefct.c" />
-    <ClCompile Include="src\jccolor.c" />
-    <ClCompile Include="src\jcdctmgr.c" />
-    <ClCompile Include="src\jchuff.c" />
-    <ClCompile Include="src\jcinit.c" />
-    <ClCompile Include="src\jcmainct.c" />
-    <ClCompile Include="src\jcmarker.c" />
-    <ClCompile Include="src\jcmaster.c" />
-    <ClCompile Include="src\jcomapi.c" />
-    <ClCompile Include="src\jcparam.c" />
-    <ClCompile Include="src\jcprepct.c" />
-    <ClCompile Include="src\jcsample.c" />
-    <ClCompile Include="src\jctrans.c" />
-    <ClCompile Include="src\jdapimin.c" />
-    <ClCompile Include="src\jdapistd.c" />
-    <ClCompile Include="src\jdarith.c" />
-    <ClCompile Include="src\jdatadst.c" />
-    <ClCompile Include="src\jdatasrc.c" />
-    <ClCompile Include="src\jdcoefct.c" />
-    <ClCompile Include="src\jdcolor.c" />
-    <ClCompile Include="src\jddctmgr.c" />
-    <ClCompile Include="src\jdhuff.c" />
-    <ClCompile Include="src\jdinput.c" />
-    <ClCompile Include="src\jdmainct.c" />
-    <ClCompile Include="src\jdmarker.c" />
-    <ClCompile Include="src\jdmaster.c" />
-    <ClCompile Include="src\jdmerge.c" />
-    <ClCompile Include="src\jdpostct.c" />
-    <ClCompile Include="src\jdsample.c" />
-    <ClCompile Include="src\jdtrans.c" />
-    <ClCompile Include="src\jerror.c" />
-    <ClCompile Include="src\jfdctflt.c" />
-    <ClCompile Include="src\jfdctfst.c" />
-    <ClCompile Include="src\jfdctint.c" />
-    <ClCompile Include="src\jidctflt.c" />
-    <ClCompile Include="src\jidctfst.c" />
-    <ClCompile Include="src\jidctint.c" />
-    <ClCompile Include="src\jmemmgr.c" />
-    <ClCompile Include="src\jmemnobs.c" />
-    <ClCompile Include="src\jquant1.c" />
-    <ClCompile Include="src\jquant2.c" />
-    <ClCompile Include="src\jutils.c" />
-    <ClCompile Include="src\transupp.c" />
-  </ItemGroup>
-  <Import Project="..\msvc\vsprops\StaticLibrary.props" />
-  <ItemDefinitionGroup>
-    <ClCompile>
-      <WarningLevel>TurnOffAllWarnings</WarningLevel>
-      <AdditionalIncludeDirectories>$(ProjectDir)include;$(ProjectDir)src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-    </ClCompile>
-  </ItemDefinitionGroup>
-  <Import Project="..\msvc\vsprops\Targets.props" />
-</Project>
\ No newline at end of file
diff --git a/dep/libjpeg/libjpeg.vcxproj.filters b/dep/libjpeg/libjpeg.vcxproj.filters
deleted file mode 100644
index 9b5b8ef17..000000000
--- a/dep/libjpeg/libjpeg.vcxproj.filters
+++ /dev/null
@@ -1,52 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup>
-    <ClInclude Include="include\pngconf.h" />
-    <ClInclude Include="include\png.h" />
-    <ClInclude Include="src\pngdebug.h" />
-    <ClInclude Include="src\pnginfo.h" />
-    <ClInclude Include="src\pngpriv.h" />
-    <ClInclude Include="src\pngstruct.h" />
-    <ClInclude Include="include\pnglibconf.h" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="src\png.c" />
-    <ClCompile Include="src\pngerror.c" />
-    <ClCompile Include="src\pngget.c" />
-    <ClCompile Include="src\pngmem.c" />
-    <ClCompile Include="src\pngpread.c" />
-    <ClCompile Include="src\pngread.c" />
-    <ClCompile Include="src\pngrio.c" />
-    <ClCompile Include="src\pngrtran.c" />
-    <ClCompile Include="src\pngrutil.c" />
-    <ClCompile Include="src\pngset.c" />
-    <ClCompile Include="src\pngtrans.c" />
-    <ClCompile Include="src\pngwio.c" />
-    <ClCompile Include="src\pngwrite.c" />
-    <ClCompile Include="src\pngwtran.c" />
-    <ClCompile Include="src\pngwutil.c" />
-    <ClCompile Include="src\intel\intel_init.c">
-      <Filter>intel</Filter>
-    </ClCompile>
-    <ClCompile Include="src\intel\filter_sse2_intrinsics.c">
-      <Filter>intel</Filter>
-    </ClCompile>
-    <ClCompile Include="src\arm\filter_neon_intrinsics.c">
-      <Filter>arm</Filter>
-    </ClCompile>
-    <ClCompile Include="src\arm\palette_neon_intrinsics.c">
-      <Filter>arm</Filter>
-    </ClCompile>
-    <ClCompile Include="src\arm\arm_init.c">
-      <Filter>arm</Filter>
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <Filter Include="arm">
-      <UniqueIdentifier>{9f24e95e-025d-4ed8-8c41-2fb1c7a36026}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="intel">
-      <UniqueIdentifier>{8316b9c1-8c00-4bc8-ace7-c9b864890f2d}</UniqueIdentifier>
-    </Filter>
-  </ItemGroup>
-</Project>
\ No newline at end of file
diff --git a/dep/libjpeg/src/jaricom.c b/dep/libjpeg/src/jaricom.c
deleted file mode 100644
index 690068861..000000000
--- a/dep/libjpeg/src/jaricom.c
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * jaricom.c
- *
- * Developed 1997-2011 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains probability estimation tables for common use in
- * arithmetic entropy encoding and decoding routines.
- *
- * This data represents Table D.3 in the JPEG spec (D.2 in the draft),
- * ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81, and Table 24
- * in the JBIG spec, ISO/IEC IS 11544 and CCITT Recommendation ITU-T T.82.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-/* The following #define specifies the packing of the four components
- * into the compact INT32 representation.
- * Note that this formula must match the actual arithmetic encoder
- * and decoder implementation.  The implementation has to be changed
- * if this formula is changed.
- * The current organization is leaned on Markus Kuhn's JBIG
- * implementation (jbig_tab.c).
- */
-
-#define V(i,a,b,c,d) (((INT32)a << 16) | ((INT32)c << 8) | ((INT32)d << 7) | b)
-
-const INT32 jpeg_aritab[113+1] = {
-/*
- * Index, Qe_Value, Next_Index_LPS, Next_Index_MPS, Switch_MPS
- */
-  V(   0, 0x5a1d,   1,   1, 1 ),
-  V(   1, 0x2586,  14,   2, 0 ),
-  V(   2, 0x1114,  16,   3, 0 ),
-  V(   3, 0x080b,  18,   4, 0 ),
-  V(   4, 0x03d8,  20,   5, 0 ),
-  V(   5, 0x01da,  23,   6, 0 ),
-  V(   6, 0x00e5,  25,   7, 0 ),
-  V(   7, 0x006f,  28,   8, 0 ),
-  V(   8, 0x0036,  30,   9, 0 ),
-  V(   9, 0x001a,  33,  10, 0 ),
-  V(  10, 0x000d,  35,  11, 0 ),
-  V(  11, 0x0006,   9,  12, 0 ),
-  V(  12, 0x0003,  10,  13, 0 ),
-  V(  13, 0x0001,  12,  13, 0 ),
-  V(  14, 0x5a7f,  15,  15, 1 ),
-  V(  15, 0x3f25,  36,  16, 0 ),
-  V(  16, 0x2cf2,  38,  17, 0 ),
-  V(  17, 0x207c,  39,  18, 0 ),
-  V(  18, 0x17b9,  40,  19, 0 ),
-  V(  19, 0x1182,  42,  20, 0 ),
-  V(  20, 0x0cef,  43,  21, 0 ),
-  V(  21, 0x09a1,  45,  22, 0 ),
-  V(  22, 0x072f,  46,  23, 0 ),
-  V(  23, 0x055c,  48,  24, 0 ),
-  V(  24, 0x0406,  49,  25, 0 ),
-  V(  25, 0x0303,  51,  26, 0 ),
-  V(  26, 0x0240,  52,  27, 0 ),
-  V(  27, 0x01b1,  54,  28, 0 ),
-  V(  28, 0x0144,  56,  29, 0 ),
-  V(  29, 0x00f5,  57,  30, 0 ),
-  V(  30, 0x00b7,  59,  31, 0 ),
-  V(  31, 0x008a,  60,  32, 0 ),
-  V(  32, 0x0068,  62,  33, 0 ),
-  V(  33, 0x004e,  63,  34, 0 ),
-  V(  34, 0x003b,  32,  35, 0 ),
-  V(  35, 0x002c,  33,   9, 0 ),
-  V(  36, 0x5ae1,  37,  37, 1 ),
-  V(  37, 0x484c,  64,  38, 0 ),
-  V(  38, 0x3a0d,  65,  39, 0 ),
-  V(  39, 0x2ef1,  67,  40, 0 ),
-  V(  40, 0x261f,  68,  41, 0 ),
-  V(  41, 0x1f33,  69,  42, 0 ),
-  V(  42, 0x19a8,  70,  43, 0 ),
-  V(  43, 0x1518,  72,  44, 0 ),
-  V(  44, 0x1177,  73,  45, 0 ),
-  V(  45, 0x0e74,  74,  46, 0 ),
-  V(  46, 0x0bfb,  75,  47, 0 ),
-  V(  47, 0x09f8,  77,  48, 0 ),
-  V(  48, 0x0861,  78,  49, 0 ),
-  V(  49, 0x0706,  79,  50, 0 ),
-  V(  50, 0x05cd,  48,  51, 0 ),
-  V(  51, 0x04de,  50,  52, 0 ),
-  V(  52, 0x040f,  50,  53, 0 ),
-  V(  53, 0x0363,  51,  54, 0 ),
-  V(  54, 0x02d4,  52,  55, 0 ),
-  V(  55, 0x025c,  53,  56, 0 ),
-  V(  56, 0x01f8,  54,  57, 0 ),
-  V(  57, 0x01a4,  55,  58, 0 ),
-  V(  58, 0x0160,  56,  59, 0 ),
-  V(  59, 0x0125,  57,  60, 0 ),
-  V(  60, 0x00f6,  58,  61, 0 ),
-  V(  61, 0x00cb,  59,  62, 0 ),
-  V(  62, 0x00ab,  61,  63, 0 ),
-  V(  63, 0x008f,  61,  32, 0 ),
-  V(  64, 0x5b12,  65,  65, 1 ),
-  V(  65, 0x4d04,  80,  66, 0 ),
-  V(  66, 0x412c,  81,  67, 0 ),
-  V(  67, 0x37d8,  82,  68, 0 ),
-  V(  68, 0x2fe8,  83,  69, 0 ),
-  V(  69, 0x293c,  84,  70, 0 ),
-  V(  70, 0x2379,  86,  71, 0 ),
-  V(  71, 0x1edf,  87,  72, 0 ),
-  V(  72, 0x1aa9,  87,  73, 0 ),
-  V(  73, 0x174e,  72,  74, 0 ),
-  V(  74, 0x1424,  72,  75, 0 ),
-  V(  75, 0x119c,  74,  76, 0 ),
-  V(  76, 0x0f6b,  74,  77, 0 ),
-  V(  77, 0x0d51,  75,  78, 0 ),
-  V(  78, 0x0bb6,  77,  79, 0 ),
-  V(  79, 0x0a40,  77,  48, 0 ),
-  V(  80, 0x5832,  80,  81, 1 ),
-  V(  81, 0x4d1c,  88,  82, 0 ),
-  V(  82, 0x438e,  89,  83, 0 ),
-  V(  83, 0x3bdd,  90,  84, 0 ),
-  V(  84, 0x34ee,  91,  85, 0 ),
-  V(  85, 0x2eae,  92,  86, 0 ),
-  V(  86, 0x299a,  93,  87, 0 ),
-  V(  87, 0x2516,  86,  71, 0 ),
-  V(  88, 0x5570,  88,  89, 1 ),
-  V(  89, 0x4ca9,  95,  90, 0 ),
-  V(  90, 0x44d9,  96,  91, 0 ),
-  V(  91, 0x3e22,  97,  92, 0 ),
-  V(  92, 0x3824,  99,  93, 0 ),
-  V(  93, 0x32b4,  99,  94, 0 ),
-  V(  94, 0x2e17,  93,  86, 0 ),
-  V(  95, 0x56a8,  95,  96, 1 ),
-  V(  96, 0x4f46, 101,  97, 0 ),
-  V(  97, 0x47e5, 102,  98, 0 ),
-  V(  98, 0x41cf, 103,  99, 0 ),
-  V(  99, 0x3c3d, 104, 100, 0 ),
-  V( 100, 0x375e,  99,  93, 0 ),
-  V( 101, 0x5231, 105, 102, 0 ),
-  V( 102, 0x4c0f, 106, 103, 0 ),
-  V( 103, 0x4639, 107, 104, 0 ),
-  V( 104, 0x415e, 103,  99, 0 ),
-  V( 105, 0x5627, 105, 106, 1 ),
-  V( 106, 0x50e7, 108, 107, 0 ),
-  V( 107, 0x4b85, 109, 103, 0 ),
-  V( 108, 0x5597, 110, 109, 0 ),
-  V( 109, 0x504f, 111, 107, 0 ),
-  V( 110, 0x5a10, 110, 111, 1 ),
-  V( 111, 0x5522, 112, 109, 0 ),
-  V( 112, 0x59eb, 112, 111, 1 ),
-/*
- * This last entry is used for fixed probability estimate of 0.5
- * as suggested in Section 10.3 Table 5 of ITU-T Rec. T.851.
- */
-  V( 113, 0x5a1d, 113, 113, 0 )
-};
diff --git a/dep/libjpeg/src/jcapimin.c b/dep/libjpeg/src/jcapimin.c
deleted file mode 100644
index 639ce86f4..000000000
--- a/dep/libjpeg/src/jcapimin.c
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * jcapimin.c
- *
- * Copyright (C) 1994-1998, Thomas G. Lane.
- * Modified 2003-2010 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains application interface code for the compression half
- * of the JPEG library.  These are the "minimum" API routines that may be
- * needed in either the normal full-compression case or the transcoding-only
- * case.
- *
- * Most of the routines intended to be called directly by an application
- * are in this file or in jcapistd.c.  But also see jcparam.c for
- * parameter-setup helper routines, jcomapi.c for routines shared by
- * compression and decompression, and jctrans.c for the transcoding case.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * Initialization of a JPEG compression object.
- * The error manager must already be set up (in case memory manager fails).
- */
-
-GLOBAL(void)
-jpeg_CreateCompress (j_compress_ptr cinfo, int version, size_t structsize)
-{
-  int i;
-
-  /* Guard against version mismatches between library and caller. */
-  cinfo->mem = NULL;		/* so jpeg_destroy knows mem mgr not called */
-  if (version != JPEG_LIB_VERSION)
-    ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
-  if (structsize != SIZEOF(struct jpeg_compress_struct))
-    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, 
-	     (int) SIZEOF(struct jpeg_compress_struct), (int) structsize);
-
-  /* For debugging purposes, we zero the whole master structure.
-   * But the application has already set the err pointer, and may have set
-   * client_data, so we have to save and restore those fields.
-   * Note: if application hasn't set client_data, tools like Purify may
-   * complain here.
-   */
-  {
-    struct jpeg_error_mgr * err = cinfo->err;
-    void * client_data = cinfo->client_data; /* ignore Purify complaint here */
-    MEMZERO(cinfo, SIZEOF(struct jpeg_compress_struct));
-    cinfo->err = err;
-    cinfo->client_data = client_data;
-  }
-  cinfo->is_decompressor = FALSE;
-
-  /* Initialize a memory manager instance for this object */
-  jinit_memory_mgr((j_common_ptr) cinfo);
-
-  /* Zero out pointers to permanent structures. */
-  cinfo->progress = NULL;
-  cinfo->dest = NULL;
-
-  cinfo->comp_info = NULL;
-
-  for (i = 0; i < NUM_QUANT_TBLS; i++) {
-    cinfo->quant_tbl_ptrs[i] = NULL;
-    cinfo->q_scale_factor[i] = 100;
-  }
-
-  for (i = 0; i < NUM_HUFF_TBLS; i++) {
-    cinfo->dc_huff_tbl_ptrs[i] = NULL;
-    cinfo->ac_huff_tbl_ptrs[i] = NULL;
-  }
-
-  /* Must do it here for emit_dqt in case jpeg_write_tables is used */
-  cinfo->block_size = DCTSIZE;
-  cinfo->natural_order = jpeg_natural_order;
-  cinfo->lim_Se = DCTSIZE2-1;
-
-  cinfo->script_space = NULL;
-
-  cinfo->input_gamma = 1.0;	/* in case application forgets */
-
-  /* OK, I'm ready */
-  cinfo->global_state = CSTATE_START;
-}
-
-
-/*
- * Destruction of a JPEG compression object
- */
-
-GLOBAL(void)
-jpeg_destroy_compress (j_compress_ptr cinfo)
-{
-  jpeg_destroy((j_common_ptr) cinfo); /* use common routine */
-}
-
-
-/*
- * Abort processing of a JPEG compression operation,
- * but don't destroy the object itself.
- */
-
-GLOBAL(void)
-jpeg_abort_compress (j_compress_ptr cinfo)
-{
-  jpeg_abort((j_common_ptr) cinfo); /* use common routine */
-}
-
-
-/*
- * Forcibly suppress or un-suppress all quantization and Huffman tables.
- * Marks all currently defined tables as already written (if suppress)
- * or not written (if !suppress).  This will control whether they get emitted
- * by a subsequent jpeg_start_compress call.
- *
- * This routine is exported for use by applications that want to produce
- * abbreviated JPEG datastreams.  It logically belongs in jcparam.c, but
- * since it is called by jpeg_start_compress, we put it here --- otherwise
- * jcparam.o would be linked whether the application used it or not.
- */
-
-GLOBAL(void)
-jpeg_suppress_tables (j_compress_ptr cinfo, boolean suppress)
-{
-  int i;
-  JQUANT_TBL * qtbl;
-  JHUFF_TBL * htbl;
-
-  for (i = 0; i < NUM_QUANT_TBLS; i++) {
-    if ((qtbl = cinfo->quant_tbl_ptrs[i]) != NULL)
-      qtbl->sent_table = suppress;
-  }
-
-  for (i = 0; i < NUM_HUFF_TBLS; i++) {
-    if ((htbl = cinfo->dc_huff_tbl_ptrs[i]) != NULL)
-      htbl->sent_table = suppress;
-    if ((htbl = cinfo->ac_huff_tbl_ptrs[i]) != NULL)
-      htbl->sent_table = suppress;
-  }
-}
-
-
-/*
- * Finish JPEG compression.
- *
- * If a multipass operating mode was selected, this may do a great deal of
- * work including most of the actual output.
- */
-
-GLOBAL(void)
-jpeg_finish_compress (j_compress_ptr cinfo)
-{
-  JDIMENSION iMCU_row;
-
-  if (cinfo->global_state == CSTATE_SCANNING ||
-      cinfo->global_state == CSTATE_RAW_OK) {
-    /* Terminate first pass */
-    if (cinfo->next_scanline < cinfo->image_height)
-      ERREXIT(cinfo, JERR_TOO_LITTLE_DATA);
-    (*cinfo->master->finish_pass) (cinfo);
-  } else if (cinfo->global_state != CSTATE_WRCOEFS)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  /* Perform any remaining passes */
-  while (! cinfo->master->is_last_pass) {
-    (*cinfo->master->prepare_for_pass) (cinfo);
-    for (iMCU_row = 0; iMCU_row < cinfo->total_iMCU_rows; iMCU_row++) {
-      if (cinfo->progress != NULL) {
-	cinfo->progress->pass_counter = (long) iMCU_row;
-	cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows;
-	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-      }
-      /* We bypass the main controller and invoke coef controller directly;
-       * all work is being done from the coefficient buffer.
-       */
-      if (! (*cinfo->coef->compress_data) (cinfo, (JSAMPIMAGE) NULL))
-	ERREXIT(cinfo, JERR_CANT_SUSPEND);
-    }
-    (*cinfo->master->finish_pass) (cinfo);
-  }
-  /* Write EOI, do final cleanup */
-  (*cinfo->marker->write_file_trailer) (cinfo);
-  (*cinfo->dest->term_destination) (cinfo);
-  /* We can use jpeg_abort to release memory and reset global_state */
-  jpeg_abort((j_common_ptr) cinfo);
-}
-
-
-/*
- * Write a special marker.
- * This is only recommended for writing COM or APPn markers.
- * Must be called after jpeg_start_compress() and before
- * first call to jpeg_write_scanlines() or jpeg_write_raw_data().
- */
-
-GLOBAL(void)
-jpeg_write_marker (j_compress_ptr cinfo, int marker,
-		   const JOCTET *dataptr, unsigned int datalen)
-{
-  JMETHOD(void, write_marker_byte, (j_compress_ptr info, int val));
-
-  if (cinfo->next_scanline != 0 ||
-      (cinfo->global_state != CSTATE_SCANNING &&
-       cinfo->global_state != CSTATE_RAW_OK &&
-       cinfo->global_state != CSTATE_WRCOEFS))
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  (*cinfo->marker->write_marker_header) (cinfo, marker, datalen);
-  write_marker_byte = cinfo->marker->write_marker_byte;	/* copy for speed */
-  while (datalen--) {
-    (*write_marker_byte) (cinfo, *dataptr);
-    dataptr++;
-  }
-}
-
-/* Same, but piecemeal. */
-
-GLOBAL(void)
-jpeg_write_m_header (j_compress_ptr cinfo, int marker, unsigned int datalen)
-{
-  if (cinfo->next_scanline != 0 ||
-      (cinfo->global_state != CSTATE_SCANNING &&
-       cinfo->global_state != CSTATE_RAW_OK &&
-       cinfo->global_state != CSTATE_WRCOEFS))
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  (*cinfo->marker->write_marker_header) (cinfo, marker, datalen);
-}
-
-GLOBAL(void)
-jpeg_write_m_byte (j_compress_ptr cinfo, int val)
-{
-  (*cinfo->marker->write_marker_byte) (cinfo, val);
-}
-
-
-/*
- * Alternate compression function: just write an abbreviated table file.
- * Before calling this, all parameters and a data destination must be set up.
- *
- * To produce a pair of files containing abbreviated tables and abbreviated
- * image data, one would proceed as follows:
- *
- *		initialize JPEG object
- *		set JPEG parameters
- *		set destination to table file
- *		jpeg_write_tables(cinfo);
- *		set destination to image file
- *		jpeg_start_compress(cinfo, FALSE);
- *		write data...
- *		jpeg_finish_compress(cinfo);
- *
- * jpeg_write_tables has the side effect of marking all tables written
- * (same as jpeg_suppress_tables(..., TRUE)).  Thus a subsequent start_compress
- * will not re-emit the tables unless it is passed write_all_tables=TRUE.
- */
-
-GLOBAL(void)
-jpeg_write_tables (j_compress_ptr cinfo)
-{
-  if (cinfo->global_state != CSTATE_START)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  /* (Re)initialize error mgr and destination modules */
-  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
-  (*cinfo->dest->init_destination) (cinfo);
-  /* Initialize the marker writer ... bit of a crock to do it here. */
-  jinit_marker_writer(cinfo);
-  /* Write them tables! */
-  (*cinfo->marker->write_tables_only) (cinfo);
-  /* And clean up. */
-  (*cinfo->dest->term_destination) (cinfo);
-  /*
-   * In library releases up through v6a, we called jpeg_abort() here to free
-   * any working memory allocated by the destination manager and marker
-   * writer.  Some applications had a problem with that: they allocated space
-   * of their own from the library memory manager, and didn't want it to go
-   * away during write_tables.  So now we do nothing.  This will cause a
-   * memory leak if an app calls write_tables repeatedly without doing a full
-   * compression cycle or otherwise resetting the JPEG object.  However, that
-   * seems less bad than unexpectedly freeing memory in the normal case.
-   * An app that prefers the old behavior can call jpeg_abort for itself after
-   * each call to jpeg_write_tables().
-   */
-}
diff --git a/dep/libjpeg/src/jcapistd.c b/dep/libjpeg/src/jcapistd.c
deleted file mode 100644
index 0917afa97..000000000
--- a/dep/libjpeg/src/jcapistd.c
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * jcapistd.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2013 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains application interface code for the compression half
- * of the JPEG library.  These are the "standard" API routines that are
- * used in the normal full-compression case.  They are not used by a
- * transcoding-only application.  Note that if an application links in
- * jpeg_start_compress, it will end up linking in the entire compressor.
- * We thus must separate this file from jcapimin.c to avoid linking the
- * whole compression library into a transcoder.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * Compression initialization.
- * Before calling this, all parameters and a data destination must be set up.
- *
- * We require a write_all_tables parameter as a failsafe check when writing
- * multiple datastreams from the same compression object.  Since prior runs
- * will have left all the tables marked sent_table=TRUE, a subsequent run
- * would emit an abbreviated stream (no tables) by default.  This may be what
- * is wanted, but for safety's sake it should not be the default behavior:
- * programmers should have to make a deliberate choice to emit abbreviated
- * images.  Therefore the documentation and examples should encourage people
- * to pass write_all_tables=TRUE; then it will take active thought to do the
- * wrong thing.
- */
-
-GLOBAL(void)
-jpeg_start_compress (j_compress_ptr cinfo, boolean write_all_tables)
-{
-  if (cinfo->global_state != CSTATE_START)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  if (write_all_tables)
-    jpeg_suppress_tables(cinfo, FALSE);	/* mark all tables to be written */
-
-  /* (Re)initialize error mgr and destination modules */
-  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
-  (*cinfo->dest->init_destination) (cinfo);
-  /* Perform master selection of active modules */
-  jinit_compress_master(cinfo);
-  /* Set up for the first pass */
-  (*cinfo->master->prepare_for_pass) (cinfo);
-  /* Ready for application to drive first pass through jpeg_write_scanlines
-   * or jpeg_write_raw_data.
-   */
-  cinfo->next_scanline = 0;
-  cinfo->global_state = (cinfo->raw_data_in ? CSTATE_RAW_OK : CSTATE_SCANNING);
-}
-
-
-/*
- * Write some scanlines of data to the JPEG compressor.
- *
- * The return value will be the number of lines actually written.
- * This should be less than the supplied num_lines only in case that
- * the data destination module has requested suspension of the compressor,
- * or if more than image_height scanlines are passed in.
- *
- * Note: we warn about excess calls to jpeg_write_scanlines() since
- * this likely signals an application programmer error.  However,
- * excess scanlines passed in the last valid call are *silently* ignored,
- * so that the application need not adjust num_lines for end-of-image
- * when using a multiple-scanline buffer.
- */
-
-GLOBAL(JDIMENSION)
-jpeg_write_scanlines (j_compress_ptr cinfo, JSAMPARRAY scanlines,
-		      JDIMENSION num_lines)
-{
-  JDIMENSION row_ctr, rows_left;
-
-  if (cinfo->global_state != CSTATE_SCANNING)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  if (cinfo->next_scanline >= cinfo->image_height)
-    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
-
-  /* Call progress monitor hook if present */
-  if (cinfo->progress != NULL) {
-    cinfo->progress->pass_counter = (long) cinfo->next_scanline;
-    cinfo->progress->pass_limit = (long) cinfo->image_height;
-    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-  }
-
-  /* Give master control module another chance if this is first call to
-   * jpeg_write_scanlines.  This lets output of the frame/scan headers be
-   * delayed so that application can write COM, etc, markers between
-   * jpeg_start_compress and jpeg_write_scanlines.
-   */
-  if (cinfo->master->call_pass_startup)
-    (*cinfo->master->pass_startup) (cinfo);
-
-  /* Ignore any extra scanlines at bottom of image. */
-  rows_left = cinfo->image_height - cinfo->next_scanline;
-  if (num_lines > rows_left)
-    num_lines = rows_left;
-
-  row_ctr = 0;
-  (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, num_lines);
-  cinfo->next_scanline += row_ctr;
-  return row_ctr;
-}
-
-
-/*
- * Alternate entry point to write raw data.
- * Processes exactly one iMCU row per call, unless suspended.
- */
-
-GLOBAL(JDIMENSION)
-jpeg_write_raw_data (j_compress_ptr cinfo, JSAMPIMAGE data,
-		     JDIMENSION num_lines)
-{
-  JDIMENSION lines_per_iMCU_row;
-
-  if (cinfo->global_state != CSTATE_RAW_OK)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  if (cinfo->next_scanline >= cinfo->image_height) {
-    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
-    return 0;
-  }
-
-  /* Call progress monitor hook if present */
-  if (cinfo->progress != NULL) {
-    cinfo->progress->pass_counter = (long) cinfo->next_scanline;
-    cinfo->progress->pass_limit = (long) cinfo->image_height;
-    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-  }
-
-  /* Give master control module another chance if this is first call to
-   * jpeg_write_raw_data.  This lets output of the frame/scan headers be
-   * delayed so that application can write COM, etc, markers between
-   * jpeg_start_compress and jpeg_write_raw_data.
-   */
-  if (cinfo->master->call_pass_startup)
-    (*cinfo->master->pass_startup) (cinfo);
-
-  /* Verify that at least one iMCU row has been passed. */
-  lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->min_DCT_v_scaled_size;
-  if (num_lines < lines_per_iMCU_row)
-    ERREXIT(cinfo, JERR_BUFFER_SIZE);
-
-  /* Directly compress the row. */
-  if (! (*cinfo->coef->compress_data) (cinfo, data)) {
-    /* If compressor did not consume the whole row, suspend processing. */
-    return 0;
-  }
-
-  /* OK, we processed one iMCU row. */
-  cinfo->next_scanline += lines_per_iMCU_row;
-  return lines_per_iMCU_row;
-}
diff --git a/dep/libjpeg/src/jcarith.c b/dep/libjpeg/src/jcarith.c
deleted file mode 100644
index 1b45089a3..000000000
--- a/dep/libjpeg/src/jcarith.c
+++ /dev/null
@@ -1,945 +0,0 @@
-/*
- * jcarith.c
- *
- * Developed 1997-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains portable arithmetic entropy encoding routines for JPEG
- * (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81).
- *
- * Both sequential and progressive modes are supported in this single module.
- *
- * Suspension is not currently supported in this module.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Expanded entropy encoder object for arithmetic encoding. */
-
-typedef struct {
-  struct jpeg_entropy_encoder pub; /* public fields */
-
-  INT32 c; /* C register, base of coding interval, layout as in sec. D.1.3 */
-  INT32 a;               /* A register, normalized size of coding interval */
-  INT32 sc;        /* counter for stacked 0xFF values which might overflow */
-  INT32 zc;          /* counter for pending 0x00 output values which might *
-                          * be discarded at the end ("Pacman" termination) */
-  int ct;  /* bit shift counter, determines when next byte will be written */
-  int buffer;                /* buffer for most recent output byte != 0xFF */
-
-  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
-  int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */
-
-  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
-  int next_restart_num;		/* next restart number to write (0-7) */
-
-  /* Pointers to statistics areas (these workspaces have image lifespan) */
-  unsigned char * dc_stats[NUM_ARITH_TBLS];
-  unsigned char * ac_stats[NUM_ARITH_TBLS];
-
-  /* Statistics bin for coding with fixed probability 0.5 */
-  unsigned char fixed_bin[4];
-} arith_entropy_encoder;
-
-typedef arith_entropy_encoder * arith_entropy_ptr;
-
-/* The following two definitions specify the allocation chunk size
- * for the statistics area.
- * According to sections F.1.4.4.1.3 and F.1.4.4.2, we need at least
- * 49 statistics bins for DC, and 245 statistics bins for AC coding.
- *
- * We use a compact representation with 1 byte per statistics bin,
- * thus the numbers directly represent byte sizes.
- * This 1 byte per statistics bin contains the meaning of the MPS
- * (more probable symbol) in the highest bit (mask 0x80), and the
- * index into the probability estimation state machine table
- * in the lower bits (mask 0x7F).
- */
-
-#define DC_STAT_BINS 64
-#define AC_STAT_BINS 256
-
-/* NOTE: Uncomment the following #define if you want to use the
- * given formula for calculating the AC conditioning parameter Kx
- * for spectral selection progressive coding in section G.1.3.2
- * of the spec (Kx = Kmin + SRL (8 + Se - Kmin) 4).
- * Although the spec and P&M authors claim that this "has proven
- * to give good results for 8 bit precision samples", I'm not
- * convinced yet that this is really beneficial.
- * Early tests gave only very marginal compression enhancements
- * (a few - around 5 or so - bytes even for very large files),
- * which would turn out rather negative if we'd suppress the
- * DAC (Define Arithmetic Conditioning) marker segments for
- * the default parameters in the future.
- * Note that currently the marker writing module emits 12-byte
- * DAC segments for a full-component scan in a color image.
- * This is not worth worrying about IMHO. However, since the
- * spec defines the default values to be used if the tables
- * are omitted (unlike Huffman tables, which are required
- * anyway), one might optimize this behaviour in the future,
- * and then it would be disadvantageous to use custom tables if
- * they don't provide sufficient gain to exceed the DAC size.
- *
- * On the other hand, I'd consider it as a reasonable result
- * that the conditioning has no significant influence on the
- * compression performance. This means that the basic
- * statistical model is already rather stable.
- *
- * Thus, at the moment, we use the default conditioning values
- * anyway, and do not use the custom formula.
- *
-#define CALCULATE_SPECTRAL_CONDITIONING
- */
-
-/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32.
- * We assume that int right shift is unsigned if INT32 right shift is,
- * which should be safe.
- */
-
-#ifdef RIGHT_SHIFT_IS_UNSIGNED
-#define ISHIFT_TEMPS	int ishift_temp;
-#define IRIGHT_SHIFT(x,shft)  \
-	((ishift_temp = (x)) < 0 ? \
-	 (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \
-	 (ishift_temp >> (shft)))
-#else
-#define ISHIFT_TEMPS
-#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
-#endif
-
-
-LOCAL(void)
-emit_byte (int val, j_compress_ptr cinfo)
-/* Write next output byte; we do not support suspension in this module. */
-{
-  struct jpeg_destination_mgr * dest = cinfo->dest;
-
-  *dest->next_output_byte++ = (JOCTET) val;
-  if (--dest->free_in_buffer == 0)
-    if (! (*dest->empty_output_buffer) (cinfo))
-      ERREXIT(cinfo, JERR_CANT_SUSPEND);
-}
-
-
-/*
- * Finish up at the end of an arithmetic-compressed scan.
- */
-
-METHODDEF(void)
-finish_pass (j_compress_ptr cinfo)
-{
-  arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
-  INT32 temp;
-
-  /* Section D.1.8: Termination of encoding */
-
-  /* Find the e->c in the coding interval with the largest
-   * number of trailing zero bits */
-  if ((temp = (e->a - 1 + e->c) & 0xFFFF0000L) < e->c)
-    e->c = temp + 0x8000L;
-  else
-    e->c = temp;
-  /* Send remaining bytes to output */
-  e->c <<= e->ct;
-  if (e->c & 0xF8000000L) {
-    /* One final overflow has to be handled */
-    if (e->buffer >= 0) {
-      if (e->zc)
-	do emit_byte(0x00, cinfo);
-	while (--e->zc);
-      emit_byte(e->buffer + 1, cinfo);
-      if (e->buffer + 1 == 0xFF)
-	emit_byte(0x00, cinfo);
-    }
-    e->zc += e->sc;  /* carry-over converts stacked 0xFF bytes to 0x00 */
-    e->sc = 0;
-  } else {
-    if (e->buffer == 0)
-      ++e->zc;
-    else if (e->buffer >= 0) {
-      if (e->zc)
-	do emit_byte(0x00, cinfo);
-	while (--e->zc);
-      emit_byte(e->buffer, cinfo);
-    }
-    if (e->sc) {
-      if (e->zc)
-	do emit_byte(0x00, cinfo);
-	while (--e->zc);
-      do {
-	emit_byte(0xFF, cinfo);
-	emit_byte(0x00, cinfo);
-      } while (--e->sc);
-    }
-  }
-  /* Output final bytes only if they are not 0x00 */
-  if (e->c & 0x7FFF800L) {
-    if (e->zc)  /* output final pending zero bytes */
-      do emit_byte(0x00, cinfo);
-      while (--e->zc);
-    emit_byte((int) ((e->c >> 19) & 0xFF), cinfo);
-    if (((e->c >> 19) & 0xFF) == 0xFF)
-      emit_byte(0x00, cinfo);
-    if (e->c & 0x7F800L) {
-      emit_byte((int) ((e->c >> 11) & 0xFF), cinfo);
-      if (((e->c >> 11) & 0xFF) == 0xFF)
-	emit_byte(0x00, cinfo);
-    }
-  }
-}
-
-
-/*
- * The core arithmetic encoding routine (common in JPEG and JBIG).
- * This needs to go as fast as possible.
- * Machine-dependent optimization facilities
- * are not utilized in this portable implementation.
- * However, this code should be fairly efficient and
- * may be a good base for further optimizations anyway.
- *
- * Parameter 'val' to be encoded may be 0 or 1 (binary decision).
- *
- * Note: I've added full "Pacman" termination support to the
- * byte output routines, which is equivalent to the optional
- * Discard_final_zeros procedure (Figure D.15) in the spec.
- * Thus, we always produce the shortest possible output
- * stream compliant to the spec (no trailing zero bytes,
- * except for FF stuffing).
- *
- * I've also introduced a new scheme for accessing
- * the probability estimation state machine table,
- * derived from Markus Kuhn's JBIG implementation.
- */
-
-LOCAL(void)
-arith_encode (j_compress_ptr cinfo, unsigned char *st, int val) 
-{
-  register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
-  register unsigned char nl, nm;
-  register INT32 qe, temp;
-  register int sv;
-
-  /* Fetch values from our compact representation of Table D.3(D.2):
-   * Qe values and probability estimation state machine
-   */
-  sv = *st;
-  qe = jpeg_aritab[sv & 0x7F];	/* => Qe_Value */
-  nl = qe & 0xFF; qe >>= 8;	/* Next_Index_LPS + Switch_MPS */
-  nm = qe & 0xFF; qe >>= 8;	/* Next_Index_MPS */
-
-  /* Encode & estimation procedures per sections D.1.4 & D.1.5 */
-  e->a -= qe;
-  if (val != (sv >> 7)) {
-    /* Encode the less probable symbol */
-    if (e->a >= qe) {
-      /* If the interval size (qe) for the less probable symbol (LPS)
-       * is larger than the interval size for the MPS, then exchange
-       * the two symbols for coding efficiency, otherwise code the LPS
-       * as usual: */
-      e->c += e->a;
-      e->a = qe;
-    }
-    *st = (sv & 0x80) ^ nl;	/* Estimate_after_LPS */
-  } else {
-    /* Encode the more probable symbol */
-    if (e->a >= 0x8000L)
-      return;  /* A >= 0x8000 -> ready, no renormalization required */
-    if (e->a < qe) {
-      /* If the interval size (qe) for the less probable symbol (LPS)
-       * is larger than the interval size for the MPS, then exchange
-       * the two symbols for coding efficiency: */
-      e->c += e->a;
-      e->a = qe;
-    }
-    *st = (sv & 0x80) ^ nm;	/* Estimate_after_MPS */
-  }
-
-  /* Renormalization & data output per section D.1.6 */
-  do {
-    e->a <<= 1;
-    e->c <<= 1;
-    if (--e->ct == 0) {
-      /* Another byte is ready for output */
-      temp = e->c >> 19;
-      if (temp > 0xFF) {
-	/* Handle overflow over all stacked 0xFF bytes */
-	if (e->buffer >= 0) {
-	  if (e->zc)
-	    do emit_byte(0x00, cinfo);
-	    while (--e->zc);
-	  emit_byte(e->buffer + 1, cinfo);
-	  if (e->buffer + 1 == 0xFF)
-	    emit_byte(0x00, cinfo);
-	}
-	e->zc += e->sc;  /* carry-over converts stacked 0xFF bytes to 0x00 */
-	e->sc = 0;
-	/* Note: The 3 spacer bits in the C register guarantee
-	 * that the new buffer byte can't be 0xFF here
-	 * (see page 160 in the P&M JPEG book). */
-	/* New output byte, might overflow later */
-	e->buffer = (int) (temp & 0xFF);
-      } else if (temp == 0xFF) {
-	++e->sc;  /* stack 0xFF byte (which might overflow later) */
-      } else {
-	/* Output all stacked 0xFF bytes, they will not overflow any more */
-	if (e->buffer == 0)
-	  ++e->zc;
-	else if (e->buffer >= 0) {
-	  if (e->zc)
-	    do emit_byte(0x00, cinfo);
-	    while (--e->zc);
-	  emit_byte(e->buffer, cinfo);
-	}
-	if (e->sc) {
-	  if (e->zc)
-	    do emit_byte(0x00, cinfo);
-	    while (--e->zc);
-	  do {
-	    emit_byte(0xFF, cinfo);
-	    emit_byte(0x00, cinfo);
-	  } while (--e->sc);
-	}
-	/* New output byte (can still overflow) */
-	e->buffer = (int) (temp & 0xFF);
-      }
-      e->c &= 0x7FFFFL;
-      e->ct += 8;
-    }
-  } while (e->a < 0x8000L);
-}
-
-
-/*
- * Emit a restart marker & resynchronize predictions.
- */
-
-LOCAL(void)
-emit_restart (j_compress_ptr cinfo, int restart_num)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  int ci;
-  jpeg_component_info * compptr;
-
-  finish_pass(cinfo);
-
-  emit_byte(0xFF, cinfo);
-  emit_byte(JPEG_RST0 + restart_num, cinfo);
-
-  /* Re-initialize statistics areas */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    /* DC needs no table for refinement scan */
-    if (cinfo->Ss == 0 && cinfo->Ah == 0) {
-      MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS);
-      /* Reset DC predictions to 0 */
-      entropy->last_dc_val[ci] = 0;
-      entropy->dc_context[ci] = 0;
-    }
-    /* AC needs no table when not present */
-    if (cinfo->Se) {
-      MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS);
-    }
-  }
-
-  /* Reset arithmetic encoding variables */
-  entropy->c = 0;
-  entropy->a = 0x10000L;
-  entropy->sc = 0;
-  entropy->zc = 0;
-  entropy->ct = 11;
-  entropy->buffer = -1;  /* empty */
-}
-
-
-/*
- * MCU encoding for DC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  unsigned char *st;
-  int blkn, ci, tbl;
-  int v, v2, m;
-  ISHIFT_TEMPS
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      emit_restart(cinfo, entropy->next_restart_num);
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  /* Encode the MCU data blocks */
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    ci = cinfo->MCU_membership[blkn];
-    tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
-
-    /* Compute the DC value after the required point transform by Al.
-     * This is simply an arithmetic right shift.
-     */
-    m = IRIGHT_SHIFT((int) (MCU_data[blkn][0][0]), cinfo->Al);
-
-    /* Sections F.1.4.1 & F.1.4.4.1: Encoding of DC coefficients */
-
-    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
-    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
-
-    /* Figure F.4: Encode_DC_DIFF */
-    if ((v = m - entropy->last_dc_val[ci]) == 0) {
-      arith_encode(cinfo, st, 0);
-      entropy->dc_context[ci] = 0;	/* zero diff category */
-    } else {
-      entropy->last_dc_val[ci] = m;
-      arith_encode(cinfo, st, 1);
-      /* Figure F.6: Encoding nonzero value v */
-      /* Figure F.7: Encoding the sign of v */
-      if (v > 0) {
-	arith_encode(cinfo, st + 1, 0);	/* Table F.4: SS = S0 + 1 */
-	st += 2;			/* Table F.4: SP = S0 + 2 */
-	entropy->dc_context[ci] = 4;	/* small positive diff category */
-      } else {
-	v = -v;
-	arith_encode(cinfo, st + 1, 1);	/* Table F.4: SS = S0 + 1 */
-	st += 3;			/* Table F.4: SN = S0 + 3 */
-	entropy->dc_context[ci] = 8;	/* small negative diff category */
-      }
-      /* Figure F.8: Encoding the magnitude category of v */
-      m = 0;
-      if (v -= 1) {
-	arith_encode(cinfo, st, 1);
-	m = 1;
-	v2 = v;
-	st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */
-	while (v2 >>= 1) {
-	  arith_encode(cinfo, st, 1);
-	  m <<= 1;
-	  st += 1;
-	}
-      }
-      arith_encode(cinfo, st, 0);
-      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
-      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
-	entropy->dc_context[ci] = 0;	/* zero diff category */
-      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
-	entropy->dc_context[ci] += 8;	/* large diff category */
-      /* Figure F.9: Encoding the magnitude bit pattern of v */
-      st += 14;
-      while (m >>= 1)
-	arith_encode(cinfo, st, (m & v) ? 1 : 0);
-    }
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU encoding for AC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  const int * natural_order;
-  JBLOCKROW block;
-  unsigned char *st;
-  int tbl, k, ke;
-  int v, v2, m;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      emit_restart(cinfo, entropy->next_restart_num);
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  natural_order = cinfo->natural_order;
-
-  /* Encode the MCU data block */
-  block = MCU_data[0];
-  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
-
-  /* Sections F.1.4.2 & F.1.4.4.2: Encoding of AC coefficients */
-
-  /* Establish EOB (end-of-block) index */
-  ke = cinfo->Se;
-  do {
-    /* We must apply the point transform by Al.  For AC coefficients this
-     * is an integer division with rounding towards 0.  To do this portably
-     * in C, we shift after obtaining the absolute value.
-     */
-    if ((v = (*block)[natural_order[ke]]) >= 0) {
-      if (v >>= cinfo->Al) break;
-    } else {
-      v = -v;
-      if (v >>= cinfo->Al) break;
-    }
-  } while (--ke);
-
-  /* Figure F.5: Encode_AC_Coefficients */
-  for (k = cinfo->Ss - 1; k < ke;) {
-    st = entropy->ac_stats[tbl] + 3 * k;
-    arith_encode(cinfo, st, 0);		/* EOB decision */
-    for (;;) {
-      if ((v = (*block)[natural_order[++k]]) >= 0) {
-	if (v >>= cinfo->Al) {
-	  arith_encode(cinfo, st + 1, 1);
-	  arith_encode(cinfo, entropy->fixed_bin, 0);
-	  break;
-	}
-      } else {
-	v = -v;
-	if (v >>= cinfo->Al) {
-	  arith_encode(cinfo, st + 1, 1);
-	  arith_encode(cinfo, entropy->fixed_bin, 1);
-	  break;
-	}
-      }
-      arith_encode(cinfo, st + 1, 0);
-      st += 3;
-    }
-    st += 2;
-    /* Figure F.8: Encoding the magnitude category of v */
-    m = 0;
-    if (v -= 1) {
-      arith_encode(cinfo, st, 1);
-      m = 1;
-      v2 = v;
-      if (v2 >>= 1) {
-	arith_encode(cinfo, st, 1);
-	m <<= 1;
-	st = entropy->ac_stats[tbl] +
-	     (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
-	while (v2 >>= 1) {
-	  arith_encode(cinfo, st, 1);
-	  m <<= 1;
-	  st += 1;
-	}
-      }
-    }
-    arith_encode(cinfo, st, 0);
-    /* Figure F.9: Encoding the magnitude bit pattern of v */
-    st += 14;
-    while (m >>= 1)
-      arith_encode(cinfo, st, (m & v) ? 1 : 0);
-  }
-  /* Encode EOB decision only if k < cinfo->Se */
-  if (k < cinfo->Se) {
-    st = entropy->ac_stats[tbl] + 3 * k;
-    arith_encode(cinfo, st, 1);
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU encoding for DC successive approximation refinement scan.
- * Note: we assume such scans can be multi-component,
- * although the spec is not very clear on the point.
- */
-
-METHODDEF(boolean)
-encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  unsigned char *st;
-  int Al, blkn;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      emit_restart(cinfo, entropy->next_restart_num);
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  st = entropy->fixed_bin;	/* use fixed probability estimation */
-  Al = cinfo->Al;
-
-  /* Encode the MCU data blocks */
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    /* We simply emit the Al'th bit of the DC coefficient value. */
-    arith_encode(cinfo, st, (MCU_data[blkn][0][0] >> Al) & 1);
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU encoding for AC successive approximation refinement scan.
- */
-
-METHODDEF(boolean)
-encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  const int * natural_order;
-  JBLOCKROW block;
-  unsigned char *st;
-  int tbl, k, ke, kex;
-  int v;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      emit_restart(cinfo, entropy->next_restart_num);
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  natural_order = cinfo->natural_order;
-
-  /* Encode the MCU data block */
-  block = MCU_data[0];
-  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
-
-  /* Section G.1.3.3: Encoding of AC coefficients */
-
-  /* Establish EOB (end-of-block) index */
-  ke = cinfo->Se;
-  do {
-    /* We must apply the point transform by Al.  For AC coefficients this
-     * is an integer division with rounding towards 0.  To do this portably
-     * in C, we shift after obtaining the absolute value.
-     */
-    if ((v = (*block)[natural_order[ke]]) >= 0) {
-      if (v >>= cinfo->Al) break;
-    } else {
-      v = -v;
-      if (v >>= cinfo->Al) break;
-    }
-  } while (--ke);
-
-  /* Establish EOBx (previous stage end-of-block) index */
-  for (kex = ke; kex > 0; kex--)
-    if ((v = (*block)[natural_order[kex]]) >= 0) {
-      if (v >>= cinfo->Ah) break;
-    } else {
-      v = -v;
-      if (v >>= cinfo->Ah) break;
-    }
-
-  /* Figure G.10: Encode_AC_Coefficients_SA */
-  for (k = cinfo->Ss - 1; k < ke;) {
-    st = entropy->ac_stats[tbl] + 3 * k;
-    if (k >= kex)
-      arith_encode(cinfo, st, 0);	/* EOB decision */
-    for (;;) {
-      if ((v = (*block)[natural_order[++k]]) >= 0) {
-	if (v >>= cinfo->Al) {
-	  if (v >> 1)			/* previously nonzero coef */
-	    arith_encode(cinfo, st + 2, (v & 1));
-	  else {			/* newly nonzero coef */
-	    arith_encode(cinfo, st + 1, 1);
-	    arith_encode(cinfo, entropy->fixed_bin, 0);
-	  }
-	  break;
-	}
-      } else {
-	v = -v;
-	if (v >>= cinfo->Al) {
-	  if (v >> 1)			/* previously nonzero coef */
-	    arith_encode(cinfo, st + 2, (v & 1));
-	  else {			/* newly nonzero coef */
-	    arith_encode(cinfo, st + 1, 1);
-	    arith_encode(cinfo, entropy->fixed_bin, 1);
-	  }
-	  break;
-	}
-      }
-      arith_encode(cinfo, st + 1, 0);
-      st += 3;
-    }
-  }
-  /* Encode EOB decision only if k < cinfo->Se */
-  if (k < cinfo->Se) {
-    st = entropy->ac_stats[tbl] + 3 * k;
-    arith_encode(cinfo, st, 1);
-  }
-
-  return TRUE;
-}
-
-
-/*
- * Encode and output one MCU's worth of arithmetic-compressed coefficients.
- */
-
-METHODDEF(boolean)
-encode_mcu (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  const int * natural_order;
-  JBLOCKROW block;
-  unsigned char *st;
-  int tbl, k, ke;
-  int v, v2, m;
-  int blkn, ci;
-  jpeg_component_info * compptr;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      emit_restart(cinfo, entropy->next_restart_num);
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  natural_order = cinfo->natural_order;
-
-  /* Encode the MCU data blocks */
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    block = MCU_data[blkn];
-    ci = cinfo->MCU_membership[blkn];
-    compptr = cinfo->cur_comp_info[ci];
-
-    /* Sections F.1.4.1 & F.1.4.4.1: Encoding of DC coefficients */
-
-    tbl = compptr->dc_tbl_no;
-
-    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
-    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
-
-    /* Figure F.4: Encode_DC_DIFF */
-    if ((v = (*block)[0] - entropy->last_dc_val[ci]) == 0) {
-      arith_encode(cinfo, st, 0);
-      entropy->dc_context[ci] = 0;	/* zero diff category */
-    } else {
-      entropy->last_dc_val[ci] = (*block)[0];
-      arith_encode(cinfo, st, 1);
-      /* Figure F.6: Encoding nonzero value v */
-      /* Figure F.7: Encoding the sign of v */
-      if (v > 0) {
-	arith_encode(cinfo, st + 1, 0);	/* Table F.4: SS = S0 + 1 */
-	st += 2;			/* Table F.4: SP = S0 + 2 */
-	entropy->dc_context[ci] = 4;	/* small positive diff category */
-      } else {
-	v = -v;
-	arith_encode(cinfo, st + 1, 1);	/* Table F.4: SS = S0 + 1 */
-	st += 3;			/* Table F.4: SN = S0 + 3 */
-	entropy->dc_context[ci] = 8;	/* small negative diff category */
-      }
-      /* Figure F.8: Encoding the magnitude category of v */
-      m = 0;
-      if (v -= 1) {
-	arith_encode(cinfo, st, 1);
-	m = 1;
-	v2 = v;
-	st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */
-	while (v2 >>= 1) {
-	  arith_encode(cinfo, st, 1);
-	  m <<= 1;
-	  st += 1;
-	}
-      }
-      arith_encode(cinfo, st, 0);
-      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
-      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
-	entropy->dc_context[ci] = 0;	/* zero diff category */
-      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
-	entropy->dc_context[ci] += 8;	/* large diff category */
-      /* Figure F.9: Encoding the magnitude bit pattern of v */
-      st += 14;
-      while (m >>= 1)
-	arith_encode(cinfo, st, (m & v) ? 1 : 0);
-    }
-
-    /* Sections F.1.4.2 & F.1.4.4.2: Encoding of AC coefficients */
-
-    if ((ke = cinfo->lim_Se) == 0) continue;
-    tbl = compptr->ac_tbl_no;
-
-    /* Establish EOB (end-of-block) index */
-    do {
-      if ((*block)[natural_order[ke]]) break;
-    } while (--ke);
-
-    /* Figure F.5: Encode_AC_Coefficients */
-    for (k = 0; k < ke;) {
-      st = entropy->ac_stats[tbl] + 3 * k;
-      arith_encode(cinfo, st, 0);	/* EOB decision */
-      while ((v = (*block)[natural_order[++k]]) == 0) {
-	arith_encode(cinfo, st + 1, 0);
-	st += 3;
-      }
-      arith_encode(cinfo, st + 1, 1);
-      /* Figure F.6: Encoding nonzero value v */
-      /* Figure F.7: Encoding the sign of v */
-      if (v > 0) {
-	arith_encode(cinfo, entropy->fixed_bin, 0);
-      } else {
-	v = -v;
-	arith_encode(cinfo, entropy->fixed_bin, 1);
-      }
-      st += 2;
-      /* Figure F.8: Encoding the magnitude category of v */
-      m = 0;
-      if (v -= 1) {
-	arith_encode(cinfo, st, 1);
-	m = 1;
-	v2 = v;
-	if (v2 >>= 1) {
-	  arith_encode(cinfo, st, 1);
-	  m <<= 1;
-	  st = entropy->ac_stats[tbl] +
-	       (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
-	  while (v2 >>= 1) {
-	    arith_encode(cinfo, st, 1);
-	    m <<= 1;
-	    st += 1;
-	  }
-	}
-      }
-      arith_encode(cinfo, st, 0);
-      /* Figure F.9: Encoding the magnitude bit pattern of v */
-      st += 14;
-      while (m >>= 1)
-	arith_encode(cinfo, st, (m & v) ? 1 : 0);
-    }
-    /* Encode EOB decision only if k < cinfo->lim_Se */
-    if (k < cinfo->lim_Se) {
-      st = entropy->ac_stats[tbl] + 3 * k;
-      arith_encode(cinfo, st, 1);
-    }
-  }
-
-  return TRUE;
-}
-
-
-/*
- * Initialize for an arithmetic-compressed scan.
- */
-
-METHODDEF(void)
-start_pass (j_compress_ptr cinfo, boolean gather_statistics)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  int ci, tbl;
-  jpeg_component_info * compptr;
-
-  if (gather_statistics)
-    /* Make sure to avoid that in the master control logic!
-     * We are fully adaptive here and need no extra
-     * statistics gathering pass!
-     */
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-
-  /* We assume jcmaster.c already validated the progressive scan parameters. */
-
-  /* Select execution routines */
-  if (cinfo->progressive_mode) {
-    if (cinfo->Ah == 0) {
-      if (cinfo->Ss == 0)
-	entropy->pub.encode_mcu = encode_mcu_DC_first;
-      else
-	entropy->pub.encode_mcu = encode_mcu_AC_first;
-    } else {
-      if (cinfo->Ss == 0)
-	entropy->pub.encode_mcu = encode_mcu_DC_refine;
-      else
-	entropy->pub.encode_mcu = encode_mcu_AC_refine;
-    }
-  } else
-    entropy->pub.encode_mcu = encode_mcu;
-
-  /* Allocate & initialize requested statistics areas */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    /* DC needs no table for refinement scan */
-    if (cinfo->Ss == 0 && cinfo->Ah == 0) {
-      tbl = compptr->dc_tbl_no;
-      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
-	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
-      if (entropy->dc_stats[tbl] == NULL)
-	entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
-	  ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS);
-      MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS);
-      /* Initialize DC predictions to 0 */
-      entropy->last_dc_val[ci] = 0;
-      entropy->dc_context[ci] = 0;
-    }
-    /* AC needs no table when not present */
-    if (cinfo->Se) {
-      tbl = compptr->ac_tbl_no;
-      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
-	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
-      if (entropy->ac_stats[tbl] == NULL)
-	entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
-	  ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS);
-      MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS);
-#ifdef CALCULATE_SPECTRAL_CONDITIONING
-      if (cinfo->progressive_mode)
-	/* Section G.1.3.2: Set appropriate arithmetic conditioning value Kx */
-	cinfo->arith_ac_K[tbl] = cinfo->Ss + ((8 + cinfo->Se - cinfo->Ss) >> 4);
-#endif
-    }
-  }
-
-  /* Initialize arithmetic encoding variables */
-  entropy->c = 0;
-  entropy->a = 0x10000L;
-  entropy->sc = 0;
-  entropy->zc = 0;
-  entropy->ct = 11;
-  entropy->buffer = -1;  /* empty */
-
-  /* Initialize restart stuff */
-  entropy->restarts_to_go = cinfo->restart_interval;
-  entropy->next_restart_num = 0;
-}
-
-
-/*
- * Module initialization routine for arithmetic entropy encoding.
- */
-
-GLOBAL(void)
-jinit_arith_encoder (j_compress_ptr cinfo)
-{
-  arith_entropy_ptr entropy;
-  int i;
-
-  entropy = (arith_entropy_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(arith_entropy_encoder));
-  cinfo->entropy = &entropy->pub;
-  entropy->pub.start_pass = start_pass;
-  entropy->pub.finish_pass = finish_pass;
-
-  /* Mark tables unallocated */
-  for (i = 0; i < NUM_ARITH_TBLS; i++) {
-    entropy->dc_stats[i] = NULL;
-    entropy->ac_stats[i] = NULL;
-  }
-
-  /* Initialize index for fixed probability estimation */
-  entropy->fixed_bin[0] = 113;
-}
diff --git a/dep/libjpeg/src/jccoefct.c b/dep/libjpeg/src/jccoefct.c
deleted file mode 100644
index 494aa2298..000000000
--- a/dep/libjpeg/src/jccoefct.c
+++ /dev/null
@@ -1,456 +0,0 @@
-/*
- * jccoefct.c
- *
- * Copyright (C) 1994-1997, Thomas G. Lane.
- * Modified 2003-2022 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the coefficient buffer controller for compression.
- * This controller is the top level of the JPEG compressor proper.
- * The coefficient buffer lies between forward-DCT and entropy encoding steps.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* We use a full-image coefficient buffer when doing Huffman optimization,
- * and also for writing multiple-scan JPEG files.  In all cases, the DCT
- * step is run during the first pass, and subsequent passes need only read
- * the buffered coefficients.
- */
-#ifdef ENTROPY_OPT_SUPPORTED
-#define FULL_COEF_BUFFER_SUPPORTED
-#else
-#ifdef C_MULTISCAN_FILES_SUPPORTED
-#define FULL_COEF_BUFFER_SUPPORTED
-#endif
-#endif
-
-
-/* Private buffer controller object */
-
-typedef struct {
-  struct jpeg_c_coef_controller pub; /* public fields */
-
-  JDIMENSION iMCU_row_num;	/* iMCU row # within image */
-  JDIMENSION MCU_ctr;		/* counts MCUs processed in current row */
-  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
-  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
-
-  /* For single-pass compression, it's sufficient to buffer just one MCU
-   * (although this may prove a bit slow in practice).
-   * We append a workspace of C_MAX_BLOCKS_IN_MCU coefficient blocks,
-   * and reuse it for each MCU constructed and sent.
-   * In multi-pass modes, this array points to the current MCU's blocks
-   * within the virtual arrays.
-   */
-  JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU];
-
-  /* In multi-pass modes, we need a virtual block array for each component. */
-  jvirt_barray_ptr whole_image[MAX_COMPONENTS];
-
-  /* Workspace for single-pass compression (omitted otherwise). */
-  JBLOCK blk_buffer[C_MAX_BLOCKS_IN_MCU];
-} my_coef_controller;
-
-typedef my_coef_controller * my_coef_ptr;
-
-
-/* Forward declarations */
-METHODDEF(boolean) compress_data
-    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
-#ifdef FULL_COEF_BUFFER_SUPPORTED
-METHODDEF(boolean) compress_first_pass
-    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
-METHODDEF(boolean) compress_output
-    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
-#endif
-
-
-LOCAL(void)
-start_iMCU_row (j_compress_ptr cinfo)
-/* Reset within-iMCU-row counters for a new row */
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-
-  /* In an interleaved scan, an MCU row is the same as an iMCU row.
-   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
-   * But at the bottom of the image, process only what's left.
-   */
-  if (cinfo->comps_in_scan > 1) {
-    coef->MCU_rows_per_iMCU_row = 1;
-  } else {
-    if (coef->iMCU_row_num < (cinfo->total_iMCU_rows-1))
-      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
-    else
-      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
-  }
-
-  coef->MCU_ctr = 0;
-  coef->MCU_vert_offset = 0;
-}
-
-
-/*
- * Initialize for a processing pass.
- */
-
-METHODDEF(void)
-start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-
-  coef->iMCU_row_num = 0;
-  start_iMCU_row(cinfo);
-
-  switch (pass_mode) {
-  case JBUF_PASS_THRU:
-    if (coef->whole_image[0] != NULL)
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    coef->pub.compress_data = compress_data;
-    break;
-#ifdef FULL_COEF_BUFFER_SUPPORTED
-  case JBUF_SAVE_AND_PASS:
-    if (coef->whole_image[0] == NULL)
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    coef->pub.compress_data = compress_first_pass;
-    break;
-  case JBUF_CRANK_DEST:
-    if (coef->whole_image[0] == NULL)
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    coef->pub.compress_data = compress_output;
-    break;
-#endif
-  default:
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-  }
-}
-
-
-/*
- * Process some data in the single-pass case.
- * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
- * per call, ie, v_samp_factor block rows for each component in the image.
- * Returns TRUE if the iMCU row is completed, FALSE if suspended.
- *
- * NB: input_buf contains a plane for each component in image,
- * which we index according to the component's SOF position.
- */
-
-METHODDEF(boolean)
-compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
-  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
-  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
-  int ci, xindex, yindex, yoffset, blockcnt;
-  JBLOCKROW blkp;
-  JSAMPARRAY input_ptr;
-  JDIMENSION xpos;
-  jpeg_component_info *compptr;
-  forward_DCT_ptr forward_DCT;
-
-  /* Loop to write as much as one whole iMCU row */
-  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
-       yoffset++) {
-    for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col;
-	 MCU_col_num++) {
-      /* Determine where data comes from in input_buf and do the DCT thing.
-       * Each call on forward_DCT processes a horizontal row of DCT blocks as
-       * wide as an MCU.  Dummy blocks at the right or bottom edge are filled in
-       * specially.  The data in them does not matter for image reconstruction,
-       * so we fill them with values that will encode to the smallest amount of
-       * data, viz: all zeroes in the AC entries, DC entries equal to previous
-       * block's DC value.  (Thanks to Thomas Kinsman for this idea.)
-       */
-      blkp = coef->blk_buffer;	/* pointer to current DCT block within MCU */
-      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	forward_DCT = cinfo->fdct->forward_DCT[compptr->component_index];
-	input_ptr = input_buf[compptr->component_index] +
-	  yoffset * compptr->DCT_v_scaled_size;
-	/* ypos == (yoffset + yindex) * compptr->DCT_v_scaled_size */
-	blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
-						: compptr->last_col_width;
-	xpos = MCU_col_num * compptr->MCU_sample_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  if (coef->iMCU_row_num < last_iMCU_row ||
-	      yoffset + yindex < compptr->last_row_height) {
-	    (*forward_DCT) (cinfo, compptr, input_ptr, blkp,
-			    xpos, (JDIMENSION) blockcnt);
-	    input_ptr += compptr->DCT_v_scaled_size;
-	    blkp += blockcnt;
-	    /* Dummy blocks at right edge */
-	    if ((xindex = compptr->MCU_width - blockcnt) == 0)
-	      continue;
-	  } else {
-	    /* At bottom of image, need a whole row of dummy blocks */
-	    xindex = compptr->MCU_width;
-	  }
-	  /* Fill in any dummy blocks needed in this row */
-	  MEMZERO(blkp, xindex * SIZEOF(JBLOCK));
-	  do {
-	    blkp[0][0] = blkp[-1][0];
-	    blkp++;
-	  } while (--xindex);
-	}
-      }
-      /* Try to write the MCU.  In event of a suspension failure, we will
-       * re-DCT the MCU on restart (a bit inefficient, could be fixed...)
-       */
-      if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->MCU_ctr = MCU_col_num;
-	return FALSE;
-      }
-    }
-    /* Completed an MCU row, but perhaps not an iMCU row */
-    coef->MCU_ctr = 0;
-  }
-  /* Completed the iMCU row, advance counters for next one */
-  coef->iMCU_row_num++;
-  start_iMCU_row(cinfo);
-  return TRUE;
-}
-
-
-#ifdef FULL_COEF_BUFFER_SUPPORTED
-
-/*
- * Process some data in the first pass of a multi-pass case.
- * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
- * per call, ie, v_samp_factor block rows for each component in the image.
- * This amount of data is read from the source buffer, DCT'd and quantized,
- * and saved into the virtual arrays.  We also generate suitable dummy blocks
- * as needed at the right and lower edges.  (The dummy blocks are constructed
- * in the virtual arrays, which have been padded appropriately.)  This makes
- * it possible for subsequent passes not to worry about real vs. dummy blocks.
- *
- * We must also emit the data to the entropy encoder.  This is conveniently
- * done by calling compress_output() after we've loaded the current strip
- * of the virtual arrays.
- *
- * NB: input_buf contains a plane for each component in image.  All
- * components are DCT'd and loaded into the virtual arrays in this pass.
- * However, it may be that only a subset of the components are emitted to
- * the entropy encoder during this first pass; be careful about looking
- * at the scan-dependent variables (MCU dimensions, etc).
- */
-
-METHODDEF(boolean)
-compress_first_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
-  JDIMENSION blocks_across, MCUs_across, MCUindex;
-  int bi, ci, h_samp_factor, block_row, block_rows, ndummy;
-  JCOEF lastDC;
-  jpeg_component_info *compptr;
-  JBLOCKARRAY buffer;
-  JBLOCKROW thisblockrow, lastblockrow;
-  JSAMPARRAY input_ptr;
-  forward_DCT_ptr forward_DCT;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Align the virtual buffer for this component. */
-    buffer = (*cinfo->mem->access_virt_barray)
-      ((j_common_ptr) cinfo, coef->whole_image[ci],
-       coef->iMCU_row_num * compptr->v_samp_factor,
-       (JDIMENSION) compptr->v_samp_factor, TRUE);
-    /* Count non-dummy DCT block rows in this iMCU row. */
-    if (coef->iMCU_row_num < last_iMCU_row)
-      block_rows = compptr->v_samp_factor;
-    else {
-      /* NB: can't use last_row_height here, since may not be set! */
-      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
-      if (block_rows == 0) block_rows = compptr->v_samp_factor;
-    }
-    blocks_across = compptr->width_in_blocks;
-    h_samp_factor = compptr->h_samp_factor;
-    /* Count number of dummy blocks to be added at the right margin. */
-    ndummy = (int) (blocks_across % h_samp_factor);
-    if (ndummy > 0)
-      ndummy = h_samp_factor - ndummy;
-    forward_DCT = cinfo->fdct->forward_DCT[ci];
-    input_ptr = input_buf[ci];
-    /* Perform DCT for all non-dummy blocks in this iMCU row.  Each call
-     * on forward_DCT processes a complete horizontal row of DCT blocks.
-     */
-    for (block_row = 0; block_row < block_rows; block_row++) {
-      thisblockrow = buffer[block_row];
-      (*forward_DCT) (cinfo, compptr, input_ptr, thisblockrow,
-		      (JDIMENSION) 0, blocks_across);
-      input_ptr += compptr->DCT_v_scaled_size;
-      if (ndummy > 0) {
-	/* Create dummy blocks at the right edge of the image. */
-	thisblockrow += blocks_across; /* => first dummy block */
-	FMEMZERO((void FAR *) thisblockrow, ndummy * SIZEOF(JBLOCK));
-	lastDC = thisblockrow[-1][0];
-	for (bi = 0; bi < ndummy; bi++) {
-	  thisblockrow[bi][0] = lastDC;
-	}
-      }
-    }
-    /* If at end of image, create dummy block rows as needed.
-     * The tricky part here is that within each MCU, we want the DC values
-     * of the dummy blocks to match the last real block's DC value.
-     * This squeezes a few more bytes out of the resulting file...
-     */
-    if (block_row < compptr->v_samp_factor) {
-      blocks_across += ndummy;	/* include lower right corner */
-      MCUs_across = blocks_across / h_samp_factor;
-      do {
-	thisblockrow = buffer[block_row];
-	lastblockrow = buffer[block_row-1];
-	FMEMZERO((void FAR *) thisblockrow,
-		 (size_t) blocks_across * SIZEOF(JBLOCK));
-	for (MCUindex = 0; MCUindex < MCUs_across; MCUindex++) {
-	  lastDC = lastblockrow[h_samp_factor-1][0];
-	  for (bi = 0; bi < h_samp_factor; bi++) {
-	    thisblockrow[bi][0] = lastDC;
-	  }
-	  thisblockrow += h_samp_factor; /* advance to next MCU in row */
-	  lastblockrow += h_samp_factor;
-	}
-      } while (++block_row < compptr->v_samp_factor);
-    }
-  }
-  /* NB: compress_output will increment iMCU_row_num if successful.
-   * A suspension return will result in redoing all the work above next time.
-   */
-
-  /* Emit data to the entropy encoder, sharing code with subsequent passes */
-  return compress_output(cinfo, input_buf);
-}
-
-
-/*
- * Process some data in subsequent passes of a multi-pass case.
- * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
- * per call, ie, v_samp_factor block rows for each component in the scan.
- * The data is obtained from the virtual arrays and fed to the entropy coder.
- * Returns TRUE if the iMCU row is completed, FALSE if suspended.
- *
- * NB: input_buf is ignored; it is likely to be a NULL pointer.
- */
-
-METHODDEF(boolean)
-compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
-  int ci, xindex, yindex, yoffset;
-  JDIMENSION start_col;
-  JBLOCKARRAY blkp;
-  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
-  JBLOCKROW buffer_ptr;
-  jpeg_component_info *compptr;
-
-  /* Align the virtual buffers for the components used in this scan.
-   * NB: during first pass, this is safe only because the buffers will
-   * already be aligned properly, so jmemmgr.c won't need to do any I/O.
-   */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    buffer[ci] = (*cinfo->mem->access_virt_barray)
-      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
-       coef->iMCU_row_num * compptr->v_samp_factor,
-       (JDIMENSION) compptr->v_samp_factor, FALSE);
-  }
-
-  /* Loop to process one whole iMCU row */
-  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
-       yoffset++) {
-    for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row;
-	 MCU_col_num++) {
-      /* Construct list of pointers to DCT blocks belonging to this MCU */
-      blkp = coef->MCU_buffer;	/* pointer to current DCT block within MCU */
-      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	start_col = MCU_col_num * compptr->MCU_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  buffer_ptr = buffer[ci][yoffset + yindex] + start_col;
-	  xindex = compptr->MCU_width;
-	  do {
-	    *blkp++ = buffer_ptr++;
-	  } while (--xindex);
-	}
-      }
-      /* Try to write the MCU. */
-      if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->MCU_ctr = MCU_col_num;
-	return FALSE;
-      }
-    }
-    /* Completed an MCU row, but perhaps not an iMCU row */
-    coef->MCU_ctr = 0;
-  }
-  /* Completed the iMCU row, advance counters for next one */
-  coef->iMCU_row_num++;
-  start_iMCU_row(cinfo);
-  return TRUE;
-}
-
-#endif /* FULL_COEF_BUFFER_SUPPORTED */
-
-
-/*
- * Initialize coefficient buffer controller.
- */
-
-GLOBAL(void)
-jinit_c_coef_controller (j_compress_ptr cinfo, boolean need_full_buffer)
-{
-  my_coef_ptr coef;
-
-  if (need_full_buffer) {
-#ifdef FULL_COEF_BUFFER_SUPPORTED
-    /* Allocate a full-image virtual array for each component, */
-    /* padded to a multiple of samp_factor DCT blocks in each direction. */
-    int ci;
-    jpeg_component_info *compptr;
-
-    coef = (my_coef_ptr) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       SIZEOF(my_coef_controller) - SIZEOF(coef->blk_buffer));
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
-	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
-				(long) compptr->h_samp_factor),
-	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
-				(long) compptr->v_samp_factor),
-	 (JDIMENSION) compptr->v_samp_factor);
-    }
-#else
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-#endif
-  } else {
-    /* We only need a single-MCU buffer. */
-    JBLOCKARRAY blkp;
-    JBLOCKROW buffer_ptr;
-    int bi;
-
-    coef = (my_coef_ptr) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_coef_controller));
-    blkp = coef->MCU_buffer;
-    buffer_ptr = coef->blk_buffer;
-    bi = C_MAX_BLOCKS_IN_MCU;
-    do {
-      *blkp++ = buffer_ptr++;
-    } while (--bi);
-    coef->whole_image[0] = NULL; /* flag for no virtual arrays */
-  }
-
-  coef->pub.start_pass = start_pass_coef;
-  cinfo->coef = &coef->pub;
-}
diff --git a/dep/libjpeg/src/jccolor.c b/dep/libjpeg/src/jccolor.c
deleted file mode 100644
index c028dd9db..000000000
--- a/dep/libjpeg/src/jccolor.c
+++ /dev/null
@@ -1,598 +0,0 @@
-/*
- * jccolor.c
- *
- * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modified 2011-2023 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains input colorspace conversion routines.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Private subobject */
-
-typedef struct {
-  struct jpeg_color_converter pub; /* public fields */
-
-  /* Private state for RGB->YCC conversion */
-  INT32 * rgb_ycc_tab;		/* => table for RGB to YCbCr conversion */
-} my_color_converter;
-
-typedef my_color_converter * my_cconvert_ptr;
-
-
-/**************** RGB -> YCbCr conversion: most common case **************/
-
-/*
- * YCbCr is defined per Recommendation ITU-R BT.601-7 (03/2011),
- * previously known as Recommendation CCIR 601-1, except that Cb and Cr
- * are normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
- * sRGB (standard RGB color space) is defined per IEC 61966-2-1:1999.
- * sYCC (standard luma-chroma-chroma color space with extended gamut)
- * is defined per IEC 61966-2-1:1999 Amendment A1:2003 Annex F.
- * bg-sRGB and bg-sYCC (big gamut standard color spaces)
- * are defined per IEC 61966-2-1:1999 Amendment A1:2003 Annex G.
- * Note that the derived conversion coefficients given in some of these
- * documents are imprecise.  The general conversion equations are
- *	Y  = Kr * R + (1 - Kr - Kb) * G + Kb * B
- *	Cb = (B - Y) / (1 - Kb) / K
- *	Cr = (R - Y) / (1 - Kr) / K
- * With Kr = 0.299 and Kb = 0.114 (derived according to SMPTE RP 177-1993
- * from the 1953 FCC NTSC primaries and CIE Illuminant C), K = 2 for sYCC,
- * the conversion equations to be implemented are therefore
- *	Y  =  0.299 * R + 0.587 * G + 0.114 * B
- *	Cb = -0.168735892 * R - 0.331264108 * G + 0.5 * B + CENTERJSAMPLE
- *	Cr =  0.5 * R - 0.418687589 * G - 0.081312411 * B + CENTERJSAMPLE
- * Note: older versions of the IJG code used a zero offset of MAXJSAMPLE/2,
- * rather than CENTERJSAMPLE, for Cb and Cr.  This gave equal positive and
- * negative swings for Cb/Cr, but meant that grayscale values (Cb=Cr=0)
- * were not represented exactly.  Now we sacrifice exact representation of
- * maximum red and maximum blue in order to get exact grayscales.
- *
- * To avoid floating-point arithmetic, we represent the fractional constants
- * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
- * the products by 2^16, with appropriate rounding, to get the correct answer.
- *
- * For even more speed, we avoid doing any multiplications in the inner loop
- * by precalculating the constants times R,G,B for all possible values.
- * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
- * for 9-bit to 12-bit samples it is still acceptable.  It's not very
- * reasonable for 16-bit samples, but if you want lossless storage
- * you shouldn't be changing colorspace anyway.
- * The CENTERJSAMPLE offsets and the rounding fudge-factor of 0.5 are included
- * in the tables to save adding them separately in the inner loop.
- */
-
-#define SCALEBITS	16	/* speediest right-shift on some machines */
-#define CBCR_OFFSET	((INT32) CENTERJSAMPLE << SCALEBITS)
-#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
-#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
-
-/* We allocate one big table and divide it up into eight parts, instead of
- * doing eight alloc_small requests.  This lets us use a single table base
- * address, which can be held in a register in the inner loops on many
- * machines (more than can hold all eight addresses, anyway).
- */
-
-#define R_Y_OFF		0			/* offset to R => Y section */
-#define G_Y_OFF		(1*(MAXJSAMPLE+1))	/* offset to G => Y section */
-#define B_Y_OFF		(2*(MAXJSAMPLE+1))	/* etc. */
-#define R_CB_OFF	(3*(MAXJSAMPLE+1))
-#define G_CB_OFF	(4*(MAXJSAMPLE+1))
-#define B_CB_OFF	(5*(MAXJSAMPLE+1))
-#define R_CR_OFF	B_CB_OFF		/* B=>Cb, R=>Cr are the same */
-#define G_CR_OFF	(6*(MAXJSAMPLE+1))
-#define B_CR_OFF	(7*(MAXJSAMPLE+1))
-#define TABLE_SIZE	(8*(MAXJSAMPLE+1))
-
-
-/*
- * Initialize for RGB->YCC colorspace conversion.
- */
-
-METHODDEF(void)
-rgb_ycc_start (j_compress_ptr cinfo)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  INT32 * rgb_ycc_tab;
-  INT32 i;
-
-  /* Allocate and fill in the conversion tables. */
-  cconvert->rgb_ycc_tab = rgb_ycc_tab = (INT32 *)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				TABLE_SIZE * SIZEOF(INT32));
-
-  for (i = 0; i <= MAXJSAMPLE; i++) {
-    rgb_ycc_tab[i+R_Y_OFF] = FIX(0.299) * i;
-    rgb_ycc_tab[i+G_Y_OFF] = FIX(0.587) * i;
-    rgb_ycc_tab[i+B_Y_OFF] = FIX(0.114) * i + ONE_HALF;
-    rgb_ycc_tab[i+R_CB_OFF] = (- FIX(0.168735892)) * i;
-    rgb_ycc_tab[i+G_CB_OFF] = (- FIX(0.331264108)) * i;
-    /* We use a rounding fudge-factor of 0.5-epsilon for Cb and Cr.
-     * This ensures that the maximum output will round to MAXJSAMPLE
-     * not MAXJSAMPLE+1, and thus that we don't have to range-limit.
-     */
-    rgb_ycc_tab[i+B_CB_OFF] = (i << (SCALEBITS-1)) + CBCR_OFFSET + ONE_HALF-1;
-/*  B=>Cb and R=>Cr tables are the same
-    rgb_ycc_tab[i+R_CR_OFF] = (i << (SCALEBITS-1)) + CBCR_OFFSET + ONE_HALF-1;
-*/
-    rgb_ycc_tab[i+G_CR_OFF] = (- FIX(0.418687589)) * i;
-    rgb_ycc_tab[i+B_CR_OFF] = (- FIX(0.081312411)) * i;
-  }
-}
-
-
-/*
- * Convert some rows of samples to the JPEG colorspace.
- *
- * Note that we change from the application's interleaved-pixel format
- * to our internal noninterleaved, one-plane-per-component format.  The
- * input buffer is therefore three times as wide as the output buffer.
- *
- * A starting row offset is provided only for the output buffer.  The
- * caller can easily adjust the passed input_buf value to accommodate
- * any row offset required on that side.
- */
-
-METHODDEF(void)
-rgb_ycc_convert (j_compress_ptr cinfo,
-		 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		 JDIMENSION output_row, int num_rows)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register int r, g, b;
-  register INT32 * ctab = cconvert->rgb_ycc_tab;
-  register JSAMPROW inptr;
-  register JSAMPROW outptr0, outptr1, outptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->image_width;
-
-  while (--num_rows >= 0) {
-    inptr = *input_buf++;
-    outptr0 = output_buf[0][output_row];
-    outptr1 = output_buf[1][output_row];
-    outptr2 = output_buf[2][output_row];
-    output_row++;
-    for (col = 0; col < num_cols; col++) {
-      r = GETJSAMPLE(inptr[RGB_RED]);
-      g = GETJSAMPLE(inptr[RGB_GREEN]);
-      b = GETJSAMPLE(inptr[RGB_BLUE]);
-      inptr += RGB_PIXELSIZE;
-      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
-       * must be too; we do not need an explicit range-limiting operation.
-       * Hence the value being shifted is never negative, and we don't
-       * need the general RIGHT_SHIFT macro.
-       */
-      /* Y */
-      outptr0[col] = (JSAMPLE)
-		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
-		 >> SCALEBITS);
-      /* Cb */
-      outptr1[col] = (JSAMPLE)
-		((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
-		 >> SCALEBITS);
-      /* Cr */
-      outptr2[col] = (JSAMPLE)
-		((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
-		 >> SCALEBITS);
-    }
-  }
-}
-
-
-/**************** Cases other than RGB -> YCbCr **************/
-
-
-/*
- * Convert some rows of samples to the JPEG colorspace.
- * This version handles RGB->grayscale conversion,
- * which is the same as the RGB->Y portion of RGB->YCbCr.
- * We assume rgb_ycc_start has been called (we only use the Y tables).
- */
-
-METHODDEF(void)
-rgb_gray_convert (j_compress_ptr cinfo,
-		  JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		  JDIMENSION output_row, int num_rows)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register INT32 y;
-  register INT32 * ctab = cconvert->rgb_ycc_tab;
-  register JSAMPROW inptr;
-  register JSAMPROW outptr;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->image_width;
-
-  while (--num_rows >= 0) {
-    inptr = *input_buf++;
-    outptr = output_buf[0][output_row++];
-    for (col = 0; col < num_cols; col++) {
-      y  = ctab[R_Y_OFF + GETJSAMPLE(inptr[RGB_RED])];
-      y += ctab[G_Y_OFF + GETJSAMPLE(inptr[RGB_GREEN])];
-      y += ctab[B_Y_OFF + GETJSAMPLE(inptr[RGB_BLUE])];
-      inptr += RGB_PIXELSIZE;
-      outptr[col] = (JSAMPLE) (y >> SCALEBITS);
-    }
-  }
-}
-
-
-/*
- * Convert some rows of samples to the JPEG colorspace.
- * This version handles Adobe-style CMYK->YCCK conversion,
- * where we convert R=1-C, G=1-M, and B=1-Y to YCbCr using the
- * same conversion as above, while passing K (black) unchanged.
- * We assume rgb_ycc_start has been called.
- */
-
-METHODDEF(void)
-cmyk_ycck_convert (j_compress_ptr cinfo,
-		   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		   JDIMENSION output_row, int num_rows)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register int r, g, b;
-  register INT32 * ctab = cconvert->rgb_ycc_tab;
-  register JSAMPROW inptr;
-  register JSAMPROW outptr0, outptr1, outptr2, outptr3;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->image_width;
-
-  while (--num_rows >= 0) {
-    inptr = *input_buf++;
-    outptr0 = output_buf[0][output_row];
-    outptr1 = output_buf[1][output_row];
-    outptr2 = output_buf[2][output_row];
-    outptr3 = output_buf[3][output_row];
-    output_row++;
-    for (col = 0; col < num_cols; col++) {
-      r = MAXJSAMPLE - GETJSAMPLE(inptr[0]);
-      g = MAXJSAMPLE - GETJSAMPLE(inptr[1]);
-      b = MAXJSAMPLE - GETJSAMPLE(inptr[2]);
-      /* K passes through as-is */
-      outptr3[col] = inptr[3];	/* don't need GETJSAMPLE here */
-      inptr += 4;
-      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
-       * must be too; we do not need an explicit range-limiting operation.
-       * Hence the value being shifted is never negative, and we don't
-       * need the general RIGHT_SHIFT macro.
-       */
-      /* Y */
-      outptr0[col] = (JSAMPLE)
-		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
-		 >> SCALEBITS);
-      /* Cb */
-      outptr1[col] = (JSAMPLE)
-		((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
-		 >> SCALEBITS);
-      /* Cr */
-      outptr2[col] = (JSAMPLE)
-		((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
-		 >> SCALEBITS);
-    }
-  }
-}
-
-
-/*
- * Convert some rows of samples to the JPEG colorspace.
- * [R,G,B] to [R-G,G,B-G] conversion with modulo calculation
- * (forward reversible color transform).
- * This can be seen as an adaption of the general RGB->YCbCr
- * conversion equation with Kr = Kb = 0, while replacing the
- * normalization by modulo calculation.
- */
-
-METHODDEF(void)
-rgb_rgb1_convert (j_compress_ptr cinfo,
-		  JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		  JDIMENSION output_row, int num_rows)
-{
-  register int r, g, b;
-  register JSAMPROW inptr;
-  register JSAMPROW outptr0, outptr1, outptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->image_width;
-
-  while (--num_rows >= 0) {
-    inptr = *input_buf++;
-    outptr0 = output_buf[0][output_row];
-    outptr1 = output_buf[1][output_row];
-    outptr2 = output_buf[2][output_row];
-    output_row++;
-    for (col = 0; col < num_cols; col++) {
-      r = GETJSAMPLE(inptr[RGB_RED]);
-      g = GETJSAMPLE(inptr[RGB_GREEN]);
-      b = GETJSAMPLE(inptr[RGB_BLUE]);
-      inptr += RGB_PIXELSIZE;
-      /* Assume that MAXJSAMPLE+1 is a power of 2, so that the MOD
-       * (modulo) operator is equivalent to the bitmask operator AND.
-       */
-      outptr0[col] = (JSAMPLE) ((r - g + CENTERJSAMPLE) & MAXJSAMPLE);
-      outptr1[col] = (JSAMPLE) g;
-      outptr2[col] = (JSAMPLE) ((b - g + CENTERJSAMPLE) & MAXJSAMPLE);
-    }
-  }
-}
-
-
-/*
- * Convert some rows of samples to the JPEG colorspace.
- * This version handles grayscale output with no conversion.
- * The source can be either plain grayscale or YCC (since Y == gray).
- */
-
-METHODDEF(void)
-grayscale_convert (j_compress_ptr cinfo,
-		   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		   JDIMENSION output_row, int num_rows)
-{
-  register JSAMPROW inptr;
-  register JSAMPROW outptr;
-  register JDIMENSION count;
-  register int instride = cinfo->input_components;
-  JDIMENSION num_cols = cinfo->image_width;
-
-  while (--num_rows >= 0) {
-    inptr = *input_buf++;
-    outptr = output_buf[0][output_row++];
-    for (count = num_cols; count > 0; count--) {
-      *outptr++ = *inptr;	/* don't need GETJSAMPLE() here */
-      inptr += instride;
-    }
-  }
-}
-
-
-/*
- * Convert some rows of samples to the JPEG colorspace.
- * No colorspace conversion, but change from interleaved
- * to separate-planes representation.
- */
-
-METHODDEF(void)
-rgb_convert (j_compress_ptr cinfo,
-	     JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-	     JDIMENSION output_row, int num_rows)
-{
-  register JSAMPROW inptr;
-  register JSAMPROW outptr0, outptr1, outptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->image_width;
-
-  while (--num_rows >= 0) {
-    inptr = *input_buf++;
-    outptr0 = output_buf[0][output_row];
-    outptr1 = output_buf[1][output_row];
-    outptr2 = output_buf[2][output_row];
-    output_row++;
-    for (col = 0; col < num_cols; col++) {
-      /* We can dispense with GETJSAMPLE() here */
-      outptr0[col] = inptr[RGB_RED];
-      outptr1[col] = inptr[RGB_GREEN];
-      outptr2[col] = inptr[RGB_BLUE];
-      inptr += RGB_PIXELSIZE;
-    }
-  }
-}
-
-
-/*
- * Convert some rows of samples to the JPEG colorspace.
- * This version handles multi-component colorspaces without conversion.
- * We assume input_components == num_components.
- */
-
-METHODDEF(void)
-null_convert (j_compress_ptr cinfo,
-	      JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-	      JDIMENSION output_row, int num_rows)
-{
-  register JSAMPROW inptr;
-  register JSAMPROW outptr;
-  register JDIMENSION count;
-  register int num_comps = cinfo->num_components;
-  JDIMENSION num_cols = cinfo->image_width;
-  int ci;
-
-  while (--num_rows >= 0) {
-    /* It seems fastest to make a separate pass for each component. */
-    for (ci = 0; ci < num_comps; ci++) {
-      inptr = input_buf[0] + ci;
-      outptr = output_buf[ci][output_row];
-      for (count = num_cols; count > 0; count--) {
-	*outptr++ = *inptr;	/* don't need GETJSAMPLE() here */
-	inptr += num_comps;
-      }
-    }
-    input_buf++;
-    output_row++;
-  }
-}
-
-
-/*
- * Empty method for start_pass.
- */
-
-METHODDEF(void)
-null_method (j_compress_ptr cinfo)
-{
-  /* no work needed */
-}
-
-
-/*
- * Module initialization routine for input colorspace conversion.
- */
-
-GLOBAL(void)
-jinit_color_converter (j_compress_ptr cinfo)
-{
-  my_cconvert_ptr cconvert;
-
-  cconvert = (my_cconvert_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_color_converter));
-  cinfo->cconvert = &cconvert->pub;
-  /* set start_pass to null method until we find out differently */
-  cconvert->pub.start_pass = null_method;
-
-  /* Make sure input_components agrees with in_color_space */
-  switch (cinfo->in_color_space) {
-  case JCS_GRAYSCALE:
-    if (cinfo->input_components != 1)
-      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
-    break;
-
-  case JCS_RGB:
-  case JCS_BG_RGB:
-#if RGB_PIXELSIZE != 3
-    if (cinfo->input_components != RGB_PIXELSIZE)
-      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
-    break;
-#endif /* else share code with YCbCr */
-
-  case JCS_YCbCr:
-  case JCS_BG_YCC:
-    if (cinfo->input_components != 3)
-      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
-    break;
-
-  case JCS_CMYK:
-  case JCS_YCCK:
-    if (cinfo->input_components != 4)
-      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
-    break;
-
-  default:			/* JCS_UNKNOWN can be anything */
-    if (cinfo->input_components < 1)
-      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
-  }
-
-  /* Support color transform only for RGB colorspaces */
-  if (cinfo->color_transform &&
-      cinfo->jpeg_color_space != JCS_RGB &&
-      cinfo->jpeg_color_space != JCS_BG_RGB)
-    ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-
-  /* Check num_components, set conversion method based on requested space */
-  switch (cinfo->jpeg_color_space) {
-  case JCS_GRAYSCALE:
-    if (cinfo->num_components != 1)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    switch (cinfo->in_color_space) {
-    case JCS_GRAYSCALE:
-    case JCS_YCbCr:
-    case JCS_BG_YCC:
-      cconvert->pub.color_convert = grayscale_convert;
-      break;
-    case JCS_RGB:
-      cconvert->pub.start_pass = rgb_ycc_start;
-      cconvert->pub.color_convert = rgb_gray_convert;
-      break;
-    default:
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    }
-    break;
-
-  case JCS_RGB:
-  case JCS_BG_RGB:
-    if (cinfo->num_components != 3)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    if (cinfo->in_color_space != cinfo->jpeg_color_space)
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    switch (cinfo->color_transform) {
-    case JCT_NONE:
-      cconvert->pub.color_convert = rgb_convert;
-      break;
-    case JCT_SUBTRACT_GREEN:
-      cconvert->pub.color_convert = rgb_rgb1_convert;
-      break;
-    default:
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    }
-    break;
-
-  case JCS_YCbCr:
-    if (cinfo->num_components != 3)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    switch (cinfo->in_color_space) {
-    case JCS_RGB:
-      cconvert->pub.start_pass = rgb_ycc_start;
-      cconvert->pub.color_convert = rgb_ycc_convert;
-      break;
-    case JCS_YCbCr:
-      cconvert->pub.color_convert = null_convert;
-      break;
-    default:
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    }
-    break;
-
-  case JCS_BG_YCC:
-    if (cinfo->num_components != 3)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    switch (cinfo->in_color_space) {
-    case JCS_RGB:
-      /* For conversion from normal RGB input to BG_YCC representation,
-       * the Cb/Cr values are first computed as usual, and then
-       * quantized further after DCT processing by a factor of
-       * 2 in reference to the nominal quantization factor.
-       */
-      /* need quantization scale by factor of 2 after DCT */
-      cinfo->comp_info[1].component_needed = TRUE;
-      cinfo->comp_info[2].component_needed = TRUE;
-      /* compute normal YCC first */
-      cconvert->pub.start_pass = rgb_ycc_start;
-      cconvert->pub.color_convert = rgb_ycc_convert;
-      break;
-    case JCS_YCbCr:
-      /* need quantization scale by factor of 2 after DCT */
-      cinfo->comp_info[1].component_needed = TRUE;
-      cinfo->comp_info[2].component_needed = TRUE;
-      /*FALLTHROUGH*/
-    case JCS_BG_YCC:
-      /* Pass through for BG_YCC input */
-      cconvert->pub.color_convert = null_convert;
-      break;
-    default:
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    }
-    break;
-
-  case JCS_CMYK:
-    if (cinfo->num_components != 4)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    if (cinfo->in_color_space != JCS_CMYK)
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    cconvert->pub.color_convert = null_convert;
-    break;
-
-  case JCS_YCCK:
-    if (cinfo->num_components != 4)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    switch (cinfo->in_color_space) {
-    case JCS_CMYK:
-      cconvert->pub.start_pass = rgb_ycc_start;
-      cconvert->pub.color_convert = cmyk_ycck_convert;
-      break;
-    case JCS_YCCK:
-      cconvert->pub.color_convert = null_convert;
-      break;
-    default:
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    }
-    break;
-
-  default:			/* allow null conversion of JCS_UNKNOWN */
-    if (cinfo->jpeg_color_space != cinfo->in_color_space ||
-	cinfo->num_components != cinfo->input_components)
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    cconvert->pub.color_convert = null_convert;
-  }
-}
diff --git a/dep/libjpeg/src/jcdctmgr.c b/dep/libjpeg/src/jcdctmgr.c
deleted file mode 100644
index a48ccd814..000000000
--- a/dep/libjpeg/src/jcdctmgr.c
+++ /dev/null
@@ -1,466 +0,0 @@
-/*
- * jcdctmgr.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2003-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the forward-DCT management logic.
- * This code selects a particular DCT implementation to be used,
- * and it performs related housekeeping chores including coefficient
- * quantization.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-
-/* Private subobject for this module */
-
-typedef struct {
-  struct jpeg_forward_dct pub;	/* public fields */
-
-  /* Pointer to the DCT routine actually in use */
-  forward_DCT_method_ptr do_dct[MAX_COMPONENTS];
-
-#ifdef DCT_FLOAT_SUPPORTED
-  /* Same as above for the floating-point case. */
-  float_DCT_method_ptr do_float_dct[MAX_COMPONENTS];
-#endif
-} my_fdct_controller;
-
-typedef my_fdct_controller * my_fdct_ptr;
-
-
-/* The allocated post-DCT divisor tables -- big enough for any
- * supported variant and not identical to the quant table entries,
- * because of scaling (especially for an unnormalized DCT) --
- * are pointed to by dct_table in the per-component comp_info
- * structures.  Each table is given in normal array order.
- */
-
-typedef union {
-  DCTELEM int_array[DCTSIZE2];
-#ifdef DCT_FLOAT_SUPPORTED
-  FAST_FLOAT float_array[DCTSIZE2];
-#endif
-} divisor_table;
-
-
-/* The current scaled-DCT routines require ISLOW-style divisor tables,
- * so be sure to compile that code if either ISLOW or SCALING is requested.
- */
-#ifdef DCT_ISLOW_SUPPORTED
-#define PROVIDE_ISLOW_TABLES
-#else
-#ifdef DCT_SCALING_SUPPORTED
-#define PROVIDE_ISLOW_TABLES
-#endif
-#endif
-
-
-/*
- * Perform forward DCT on one or more blocks of a component.
- *
- * The input samples are taken from the sample_data[] array starting at
- * position start_col, and moving to the right for any additional blocks.
- * The quantized coefficients are returned in coef_blocks[].
- */
-
-METHODDEF(void)
-forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
-	     JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
-	     JDIMENSION start_col, JDIMENSION num_blocks)
-/* This version is used for integer DCT implementations. */
-{
-  /* This routine is heavily used, so it's worth coding it tightly. */
-  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
-  forward_DCT_method_ptr do_dct = fdct->do_dct[compptr->component_index];
-  DCTELEM * divisors = (DCTELEM *) compptr->dct_table;
-  DCTELEM workspace[DCTSIZE2];	/* work area for FDCT subroutine */
-  JDIMENSION bi;
-
-  for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
-    /* Perform the DCT */
-    (*do_dct) (workspace, sample_data, start_col);
-
-    /* Quantize/descale the coefficients, and store into coef_blocks[] */
-    { register DCTELEM temp, qval;
-      register int i;
-      register JCOEFPTR output_ptr = coef_blocks[bi];
-
-      for (i = 0; i < DCTSIZE2; i++) {
-	qval = divisors[i];
-	temp = workspace[i];
-	/* Divide the coefficient value by qval, ensuring proper rounding.
-	 * Since C does not specify the direction of rounding for negative
-	 * quotients, we have to force the dividend positive for portability.
-	 *
-	 * In most files, at least half of the output values will be zero
-	 * (at default quantization settings, more like three-quarters...)
-	 * so we should ensure that this case is fast.  On many machines,
-	 * a comparison is enough cheaper than a divide to make a special test
-	 * a win.  Since both inputs will be nonnegative, we need only test
-	 * for a < b to discover whether a/b is 0.
-	 * If your machine's division is fast enough, define FAST_DIVIDE.
-	 */
-#ifdef FAST_DIVIDE
-#define DIVIDE_BY(a,b)	a /= b
-#else
-#define DIVIDE_BY(a,b)	if (a >= b) a /= b; else a = 0
-#endif
-	if (temp < 0) {
-	  temp = -temp;
-	  temp += qval>>1;	/* for rounding */
-	  DIVIDE_BY(temp, qval);
-	  temp = -temp;
-	} else {
-	  temp += qval>>1;	/* for rounding */
-	  DIVIDE_BY(temp, qval);
-	}
-	output_ptr[i] = (JCOEF) temp;
-      }
-    }
-  }
-}
-
-
-#ifdef DCT_FLOAT_SUPPORTED
-
-METHODDEF(void)
-forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
-		   JDIMENSION start_col, JDIMENSION num_blocks)
-/* This version is used for floating-point DCT implementations. */
-{
-  /* This routine is heavily used, so it's worth coding it tightly. */
-  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
-  float_DCT_method_ptr do_dct = fdct->do_float_dct[compptr->component_index];
-  FAST_FLOAT * divisors = (FAST_FLOAT *) compptr->dct_table;
-  FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
-  JDIMENSION bi;
-
-  for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
-    /* Perform the DCT */
-    (*do_dct) (workspace, sample_data, start_col);
-
-    /* Quantize/descale the coefficients, and store into coef_blocks[] */
-    { register FAST_FLOAT temp;
-      register int i;
-      register JCOEFPTR output_ptr = coef_blocks[bi];
-
-      for (i = 0; i < DCTSIZE2; i++) {
-	/* Apply the quantization and scaling factor */
-	temp = workspace[i] * divisors[i];
-	/* Round to nearest integer.
-	 * Since C does not specify the direction of rounding for negative
-	 * quotients, we have to force the dividend positive for portability.
-	 * The maximum coefficient size is +-16K (for 12-bit data), so this
-	 * code should work for either 16-bit or 32-bit ints.
-	 */
-	output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
-      }
-    }
-  }
-}
-
-#endif /* DCT_FLOAT_SUPPORTED */
-
-
-/*
- * Initialize for a processing pass.
- * Verify that all referenced Q-tables are present, and set up
- * the divisor table for each one.
- * In the current implementation, DCT of all components is done during
- * the first pass, even if only some components will be output in the
- * first scan.  Hence all components should be examined here.
- */
-
-METHODDEF(void)
-start_pass_fdctmgr (j_compress_ptr cinfo)
-{
-  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
-  int ci, qtblno, i;
-  jpeg_component_info *compptr;
-  int method = 0;
-  JQUANT_TBL * qtbl;
-  DCTELEM * dtbl;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Select the proper DCT routine for this component's scaling */
-    switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
-#ifdef DCT_SCALING_SUPPORTED
-    case ((1 << 8) + 1):
-      fdct->do_dct[ci] = jpeg_fdct_1x1;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((2 << 8) + 2):
-      fdct->do_dct[ci] = jpeg_fdct_2x2;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((3 << 8) + 3):
-      fdct->do_dct[ci] = jpeg_fdct_3x3;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((4 << 8) + 4):
-      fdct->do_dct[ci] = jpeg_fdct_4x4;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((5 << 8) + 5):
-      fdct->do_dct[ci] = jpeg_fdct_5x5;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((6 << 8) + 6):
-      fdct->do_dct[ci] = jpeg_fdct_6x6;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((7 << 8) + 7):
-      fdct->do_dct[ci] = jpeg_fdct_7x7;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((9 << 8) + 9):
-      fdct->do_dct[ci] = jpeg_fdct_9x9;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((10 << 8) + 10):
-      fdct->do_dct[ci] = jpeg_fdct_10x10;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((11 << 8) + 11):
-      fdct->do_dct[ci] = jpeg_fdct_11x11;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((12 << 8) + 12):
-      fdct->do_dct[ci] = jpeg_fdct_12x12;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((13 << 8) + 13):
-      fdct->do_dct[ci] = jpeg_fdct_13x13;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((14 << 8) + 14):
-      fdct->do_dct[ci] = jpeg_fdct_14x14;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((15 << 8) + 15):
-      fdct->do_dct[ci] = jpeg_fdct_15x15;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((16 << 8) + 16):
-      fdct->do_dct[ci] = jpeg_fdct_16x16;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((16 << 8) + 8):
-      fdct->do_dct[ci] = jpeg_fdct_16x8;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((14 << 8) + 7):
-      fdct->do_dct[ci] = jpeg_fdct_14x7;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((12 << 8) + 6):
-      fdct->do_dct[ci] = jpeg_fdct_12x6;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((10 << 8) + 5):
-      fdct->do_dct[ci] = jpeg_fdct_10x5;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((8 << 8) + 4):
-      fdct->do_dct[ci] = jpeg_fdct_8x4;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((6 << 8) + 3):
-      fdct->do_dct[ci] = jpeg_fdct_6x3;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((4 << 8) + 2):
-      fdct->do_dct[ci] = jpeg_fdct_4x2;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((2 << 8) + 1):
-      fdct->do_dct[ci] = jpeg_fdct_2x1;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((8 << 8) + 16):
-      fdct->do_dct[ci] = jpeg_fdct_8x16;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((7 << 8) + 14):
-      fdct->do_dct[ci] = jpeg_fdct_7x14;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((6 << 8) + 12):
-      fdct->do_dct[ci] = jpeg_fdct_6x12;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((5 << 8) + 10):
-      fdct->do_dct[ci] = jpeg_fdct_5x10;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((4 << 8) + 8):
-      fdct->do_dct[ci] = jpeg_fdct_4x8;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((3 << 8) + 6):
-      fdct->do_dct[ci] = jpeg_fdct_3x6;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((2 << 8) + 4):
-      fdct->do_dct[ci] = jpeg_fdct_2x4;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((1 << 8) + 2):
-      fdct->do_dct[ci] = jpeg_fdct_1x2;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-#endif
-    case ((DCTSIZE << 8) + DCTSIZE):
-      switch (cinfo->dct_method) {
-#ifdef DCT_ISLOW_SUPPORTED
-      case JDCT_ISLOW:
-	fdct->do_dct[ci] = jpeg_fdct_islow;
-	method = JDCT_ISLOW;
-	break;
-#endif
-#ifdef DCT_IFAST_SUPPORTED
-      case JDCT_IFAST:
-	fdct->do_dct[ci] = jpeg_fdct_ifast;
-	method = JDCT_IFAST;
-	break;
-#endif
-#ifdef DCT_FLOAT_SUPPORTED
-      case JDCT_FLOAT:
-	fdct->do_float_dct[ci] = jpeg_fdct_float;
-	method = JDCT_FLOAT;
-	break;
-#endif
-      default:
-	ERREXIT(cinfo, JERR_NOT_COMPILED);
-      }
-      break;
-    default:
-      ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
-	       compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
-    }
-    qtblno = compptr->quant_tbl_no;
-    /* Make sure specified quantization table is present */
-    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
-	cinfo->quant_tbl_ptrs[qtblno] == NULL)
-      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
-    qtbl = cinfo->quant_tbl_ptrs[qtblno];
-    /* Create divisor table from quant table */
-    switch (method) {
-#ifdef PROVIDE_ISLOW_TABLES
-    case JDCT_ISLOW:
-      /* For LL&M IDCT method, divisors are equal to raw quantization
-       * coefficients multiplied by 8 (to counteract scaling).
-       */
-      dtbl = (DCTELEM *) compptr->dct_table;
-      for (i = 0; i < DCTSIZE2; i++) {
-	dtbl[i] =
-	  ((DCTELEM) qtbl->quantval[i]) << (compptr->component_needed ? 4 : 3);
-      }
-      fdct->pub.forward_DCT[ci] = forward_DCT;
-      break;
-#endif
-#ifdef DCT_IFAST_SUPPORTED
-    case JDCT_IFAST:
-      {
-	/* For AA&N IDCT method, divisors are equal to quantization
-	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
-	 *   scalefactor[0] = 1
-	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
-	 * We apply a further scale factor of 8.
-	 */
-#define CONST_BITS 14
-	static const INT16 aanscales[DCTSIZE2] = {
-	  /* precomputed values scaled up by 14 bits */
-	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
-	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
-	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
-	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
-	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
-	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
-	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
-	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
-	};
-	SHIFT_TEMPS
-
-	dtbl = (DCTELEM *) compptr->dct_table;
-	for (i = 0; i < DCTSIZE2; i++) {
-	  dtbl[i] = (DCTELEM)
-	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
-				  (INT32) aanscales[i]),
-		    compptr->component_needed ? CONST_BITS-4 : CONST_BITS-3);
-	}
-      }
-      fdct->pub.forward_DCT[ci] = forward_DCT;
-      break;
-#endif
-#ifdef DCT_FLOAT_SUPPORTED
-    case JDCT_FLOAT:
-      {
-	/* For float AA&N IDCT method, divisors are equal to quantization
-	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
-	 *   scalefactor[0] = 1
-	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
-	 * We apply a further scale factor of 8.
-	 * What's actually stored is 1/divisor so that the inner loop can
-	 * use a multiplication rather than a division.
-	 */
-	FAST_FLOAT * fdtbl = (FAST_FLOAT *) compptr->dct_table;
-	int row, col;
-	static const double aanscalefactor[DCTSIZE] = {
-	  1.0, 1.387039845, 1.306562965, 1.175875602,
-	  1.0, 0.785694958, 0.541196100, 0.275899379
-	};
-
-	i = 0;
-	for (row = 0; row < DCTSIZE; row++) {
-	  for (col = 0; col < DCTSIZE; col++) {
-	    fdtbl[i] = (FAST_FLOAT)
-	      (1.0 / ((double) qtbl->quantval[i] *
-		      aanscalefactor[row] * aanscalefactor[col] *
-		      (compptr->component_needed ? 16.0 : 8.0)));
-	    i++;
-	  }
-	}
-      }
-      fdct->pub.forward_DCT[ci] = forward_DCT_float;
-      break;
-#endif
-    default:
-      ERREXIT(cinfo, JERR_NOT_COMPILED);
-    }
-  }
-}
-
-
-/*
- * Initialize FDCT manager.
- */
-
-GLOBAL(void)
-jinit_forward_dct (j_compress_ptr cinfo)
-{
-  my_fdct_ptr fdct;
-  int ci;
-  jpeg_component_info *compptr;
-
-  fdct = (my_fdct_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_fdct_controller));
-  cinfo->fdct = &fdct->pub;
-  fdct->pub.start_pass = start_pass_fdctmgr;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Allocate a divisor table for each component */
-    compptr->dct_table = (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(divisor_table));
-  }
-}
diff --git a/dep/libjpeg/src/jchuff.c b/dep/libjpeg/src/jchuff.c
deleted file mode 100644
index 1f527b218..000000000
--- a/dep/libjpeg/src/jchuff.c
+++ /dev/null
@@ -1,1656 +0,0 @@
-/*
- * jchuff.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2006-2023 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains Huffman entropy encoding routines.
- * Both sequential and progressive modes are supported in this single module.
- *
- * Much of the complexity here has to do with supporting output suspension.
- * If the data destination module demands suspension, we want to be able to
- * back up to the start of the current MCU.  To do this, we copy state
- * variables into local working storage, and update them back to the
- * permanent JPEG objects only upon successful completion of an MCU.
- *
- * We do not support output suspension for the progressive JPEG mode, since
- * the library currently does not allow multiple-scan files to be written
- * with output suspension.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* The legal range of a DCT coefficient is
- *  -1024 .. +1023  for 8-bit sample data precision;
- * -16384 .. +16383 for 12-bit sample data precision.
- * Hence the magnitude should always fit in sample data precision + 2 bits.
- */
-
-/* Derived data constructed for each Huffman table */
-
-typedef struct {
-  unsigned int ehufco[256];	/* code for each symbol */
-  char ehufsi[256];		/* length of code for each symbol */
-  /* If no code has been allocated for a symbol S, ehufsi[S] contains 0 */
-} c_derived_tbl;
-
-
-/* Expanded entropy encoder object for Huffman encoding.
- *
- * The savable_state subrecord contains fields that change within an MCU,
- * but must not be updated permanently until we complete the MCU.
- */
-
-typedef struct {
-  INT32 put_buffer;		/* current bit-accumulation buffer */
-  int put_bits;			/* # of bits now in it */
-  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
-} savable_state;
-
-/* This macro is to work around compilers with missing or broken
- * structure assignment.  You'll need to fix this code if you have
- * such a compiler and you change MAX_COMPS_IN_SCAN.
- */
-
-#ifndef NO_STRUCT_ASSIGN
-#define ASSIGN_STATE(dest,src)  ((dest) = (src))
-#else
-#if MAX_COMPS_IN_SCAN == 4
-#define ASSIGN_STATE(dest,src)  \
-	((dest).put_buffer = (src).put_buffer, \
-	 (dest).put_bits = (src).put_bits, \
-	 (dest).last_dc_val[0] = (src).last_dc_val[0], \
-	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
-	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
-	 (dest).last_dc_val[3] = (src).last_dc_val[3])
-#endif
-#endif
-
-
-typedef struct {
-  struct jpeg_entropy_encoder pub; /* public fields */
-
-  savable_state saved;		/* Bit buffer & DC state at start of MCU */
-
-  /* These fields are NOT loaded into local working state. */
-  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
-  int next_restart_num;		/* next restart number to write (0-7) */
-
-  /* Pointers to derived tables (these workspaces have image lifespan) */
-  c_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
-  c_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS];
-
-  /* Statistics tables for optimization */
-  long * dc_count_ptrs[NUM_HUFF_TBLS];
-  long * ac_count_ptrs[NUM_HUFF_TBLS];
-
-  /* Following fields used only in progressive mode */
-
-  /* Mode flag: TRUE for optimization, FALSE for actual data output */
-  boolean gather_statistics;
-
-  /* next_output_byte/free_in_buffer are local copies of cinfo->dest fields.
-   */
-  JOCTET * next_output_byte;	/* => next byte to write in buffer */
-  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
-  j_compress_ptr cinfo;		/* link to cinfo (needed for dump_buffer) */
-
-  /* Coding status for AC components */
-  int ac_tbl_no;		/* the table number of the single component */
-  unsigned int EOBRUN;		/* run length of EOBs */
-  unsigned int BE;		/* # of buffered correction bits before MCU */
-  char * bit_buffer;		/* buffer for correction bits (1 per char) */
-  /* packing correction bits tightly would save some space but cost time... */
-} huff_entropy_encoder;
-
-typedef huff_entropy_encoder * huff_entropy_ptr;
-
-/* Working state while writing an MCU (sequential mode).
- * This struct contains all the fields that are needed by subroutines.
- */
-
-typedef struct {
-  JOCTET * next_output_byte;	/* => next byte to write in buffer */
-  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
-  savable_state cur;		/* Current bit buffer & DC state */
-  j_compress_ptr cinfo;		/* dump_buffer needs access to this */
-} working_state;
-
-/* MAX_CORR_BITS is the number of bits the AC refinement correction-bit
- * buffer can hold.  Larger sizes may slightly improve compression, but
- * 1000 is already well into the realm of overkill.
- * The minimum safe size is 64 bits.
- */
-
-#define MAX_CORR_BITS  1000	/* Max # of correction bits I can buffer */
-
-/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32.
- * We assume that int right shift is unsigned if INT32 right shift is,
- * which should be safe.
- */
-
-#ifdef RIGHT_SHIFT_IS_UNSIGNED
-#define ISHIFT_TEMPS	int ishift_temp;
-#define IRIGHT_SHIFT(x,shft)  \
-	((ishift_temp = (x)) < 0 ? \
-	 (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \
-	 (ishift_temp >> (shft)))
-#else
-#define ISHIFT_TEMPS
-#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
-#endif
-
-
-/*
- * Compute the derived values for a Huffman table.
- * This routine also performs some validation checks on the table.
- */
-
-LOCAL(void)
-jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno,
-			 c_derived_tbl ** pdtbl)
-{
-  JHUFF_TBL *htbl;
-  c_derived_tbl *dtbl;
-  int p, i, l, lastp, si, maxsymbol;
-  char huffsize[257];
-  unsigned int huffcode[257];
-  unsigned int code;
-
-  /* Note that huffsize[] and huffcode[] are filled in code-length order,
-   * paralleling the order of the symbols themselves in htbl->huffval[].
-   */
-
-  /* Find the input Huffman table */
-  if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
-    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
-  htbl =
-    isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
-  if (htbl == NULL)
-    htbl = jpeg_std_huff_table((j_common_ptr) cinfo, isDC, tblno);
-
-  /* Allocate a workspace if we haven't already done so. */
-  if (*pdtbl == NULL)
-    *pdtbl = (c_derived_tbl *) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(c_derived_tbl));
-  dtbl = *pdtbl;
-  
-  /* Figure C.1: make table of Huffman code length for each symbol */
-
-  p = 0;
-  for (l = 1; l <= 16; l++) {
-    i = (int) htbl->bits[l];
-    if (i < 0 || p + i > 256)	/* protect against table overrun */
-      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-    while (i--)
-      huffsize[p++] = (char) l;
-  }
-  huffsize[p] = 0;
-  lastp = p;
-  
-  /* Figure C.2: generate the codes themselves */
-  /* We also validate that the counts represent a legal Huffman code tree. */
-
-  code = 0;
-  si = huffsize[0];
-  p = 0;
-  while (huffsize[p]) {
-    while (((int) huffsize[p]) == si) {
-      huffcode[p++] = code;
-      code++;
-    }
-    /* code is now 1 more than the last code used for codelength si; but
-     * it must still fit in si bits, since no code is allowed to be all ones.
-     */
-    if (((INT32) code) >= (((INT32) 1) << si))
-      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-    code <<= 1;
-    si++;
-  }
-  
-  /* Figure C.3: generate encoding tables */
-  /* These are code and size indexed by symbol value */
-
-  /* Set all codeless symbols to have code length 0;
-   * this lets us detect duplicate VAL entries here, and later
-   * allows emit_bits to detect any attempt to emit such symbols.
-   */
-  MEMZERO(dtbl->ehufsi, SIZEOF(dtbl->ehufsi));
-
-  /* This is also a convenient place to check for out-of-range
-   * and duplicated VAL entries.  We allow 0..255 for AC symbols
-   * but only 0..15 for DC.  (We could constrain them further
-   * based on data depth and mode, but this seems enough.)
-   */
-  maxsymbol = isDC ? 15 : 255;
-
-  for (p = 0; p < lastp; p++) {
-    i = htbl->huffval[p];
-    if (i < 0 || i > maxsymbol || dtbl->ehufsi[i])
-      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-    dtbl->ehufco[i] = huffcode[p];
-    dtbl->ehufsi[i] = huffsize[p];
-  }
-}
-
-
-/* Outputting bytes to the file.
- * NB: these must be called only when actually outputting,
- * that is, entropy->gather_statistics == FALSE.
- */
-
-/* Emit a byte, taking 'action' if must suspend. */
-#define emit_byte_s(state,val,action)  \
-	{ *(state)->next_output_byte++ = (JOCTET) (val);  \
-	  if (--(state)->free_in_buffer == 0)  \
-	    if (! dump_buffer_s(state))  \
-	      { action; } }
-
-/* Emit a byte */
-#define emit_byte_e(entropy,val)  \
-	{ *(entropy)->next_output_byte++ = (JOCTET) (val);  \
-	  if (--(entropy)->free_in_buffer == 0)  \
-	    dump_buffer_e(entropy); }
-
-
-LOCAL(boolean)
-dump_buffer_s (working_state * state)
-/* Empty the output buffer; return TRUE if successful, FALSE if must suspend */
-{
-  struct jpeg_destination_mgr * dest = state->cinfo->dest;
-
-  if (! (*dest->empty_output_buffer) (state->cinfo))
-    return FALSE;
-  /* After a successful buffer dump, must reset buffer pointers */
-  state->next_output_byte = dest->next_output_byte;
-  state->free_in_buffer = dest->free_in_buffer;
-  return TRUE;
-}
-
-
-LOCAL(void)
-dump_buffer_e (huff_entropy_ptr entropy)
-/* Empty the output buffer; we do not support suspension in this case. */
-{
-  struct jpeg_destination_mgr * dest = entropy->cinfo->dest;
-
-  if (! (*dest->empty_output_buffer) (entropy->cinfo))
-    ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND);
-  /* After a successful buffer dump, must reset buffer pointers */
-  entropy->next_output_byte = dest->next_output_byte;
-  entropy->free_in_buffer = dest->free_in_buffer;
-}
-
-
-/* Outputting bits to the file */
-
-/* Only the right 24 bits of put_buffer are used; the valid bits are
- * left-justified in this part.  At most 16 bits can be passed to emit_bits
- * in one call, and we never retain more than 7 bits in put_buffer
- * between calls, so 24 bits are sufficient.
- */
-
-INLINE
-LOCAL(boolean)
-emit_bits_s (working_state * state, unsigned int code, int size)
-/* Emit some bits; return TRUE if successful, FALSE if must suspend */
-{
-  /* This routine is heavily used, so it's worth coding tightly. */
-  register INT32 put_buffer;
-  register int put_bits;
-
-  /* if size is 0, caller used an invalid Huffman table entry */
-  if (size == 0)
-    ERREXIT(state->cinfo, JERR_HUFF_MISSING_CODE);
-
-  /* mask off any extra bits in code */
-  put_buffer = ((INT32) code) & ((((INT32) 1) << size) - 1);
-
-  /* new number of bits in buffer */
-  put_bits = size + state->cur.put_bits;
-
-  put_buffer <<= 24 - put_bits; /* align incoming bits */
-
-  /* and merge with old buffer contents */
-  put_buffer |= state->cur.put_buffer;
-
-  while (put_bits >= 8) {
-    int c = (int) ((put_buffer >> 16) & 0xFF);
-
-    emit_byte_s(state, c, return FALSE);
-    if (c == 0xFF) {		/* need to stuff a zero byte? */
-      emit_byte_s(state, 0, return FALSE);
-    }
-    put_buffer <<= 8;
-    put_bits -= 8;
-  }
-
-  state->cur.put_buffer = put_buffer; /* update state variables */
-  state->cur.put_bits = put_bits;
-
-  return TRUE;
-}
-
-
-INLINE
-LOCAL(void)
-emit_bits_e (huff_entropy_ptr entropy, unsigned int code, int size)
-/* Emit some bits, unless we are in gather mode */
-{
-  /* This routine is heavily used, so it's worth coding tightly. */
-  register INT32 put_buffer;
-  register int put_bits;
-
-  /* if size is 0, caller used an invalid Huffman table entry */
-  if (size == 0)
-    ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
-
-  if (entropy->gather_statistics)
-    return;			/* do nothing if we're only getting stats */
-
-  /* mask off any extra bits in code */
-  put_buffer = ((INT32) code) & ((((INT32) 1) << size) - 1);
-
-  /* new number of bits in buffer */
-  put_bits = size + entropy->saved.put_bits;
-
-  put_buffer <<= 24 - put_bits; /* align incoming bits */
-
-  /* and merge with old buffer contents */
-  put_buffer |= entropy->saved.put_buffer;
-
-  while (put_bits >= 8) {
-    int c = (int) ((put_buffer >> 16) & 0xFF);
-
-    emit_byte_e(entropy, c);
-    if (c == 0xFF) {		/* need to stuff a zero byte? */
-      emit_byte_e(entropy, 0);
-    }
-    put_buffer <<= 8;
-    put_bits -= 8;
-  }
-
-  entropy->saved.put_buffer = put_buffer; /* update variables */
-  entropy->saved.put_bits = put_bits;
-}
-
-
-LOCAL(boolean)
-flush_bits_s (working_state * state)
-{
-  if (! emit_bits_s(state, 0x7F, 7)) /* fill any partial byte with ones */
-    return FALSE;
-  state->cur.put_buffer = 0;	     /* and reset bit-buffer to empty */
-  state->cur.put_bits = 0;
-  return TRUE;
-}
-
-
-LOCAL(void)
-flush_bits_e (huff_entropy_ptr entropy)
-{
-  emit_bits_e(entropy, 0x7F, 7); /* fill any partial byte with ones */
-  entropy->saved.put_buffer = 0; /* and reset bit-buffer to empty */
-  entropy->saved.put_bits = 0;
-}
-
-
-/*
- * Emit (or just count) a Huffman symbol.
- */
-
-INLINE
-LOCAL(void)
-emit_dc_symbol (huff_entropy_ptr entropy, int tbl_no, int symbol)
-{
-  if (entropy->gather_statistics)
-    entropy->dc_count_ptrs[tbl_no][symbol]++;
-  else {
-    c_derived_tbl * tbl = entropy->dc_derived_tbls[tbl_no];
-    emit_bits_e(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);
-  }
-}
-
-
-INLINE
-LOCAL(void)
-emit_ac_symbol (huff_entropy_ptr entropy, int tbl_no, int symbol)
-{
-  if (entropy->gather_statistics)
-    entropy->ac_count_ptrs[tbl_no][symbol]++;
-  else {
-    c_derived_tbl * tbl = entropy->ac_derived_tbls[tbl_no];
-    emit_bits_e(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);
-  }
-}
-
-
-/*
- * Emit bits from a correction bit buffer.
- */
-
-LOCAL(void)
-emit_buffered_bits (huff_entropy_ptr entropy, char * bufstart,
-		    unsigned int nbits)
-{
-  if (entropy->gather_statistics)
-    return;			/* no real work */
-
-  while (nbits > 0) {
-    emit_bits_e(entropy, (unsigned int) (*bufstart), 1);
-    bufstart++;
-    nbits--;
-  }
-}
-
-
-/*
- * Emit any pending EOBRUN symbol.
- */
-
-LOCAL(void)
-emit_eobrun (huff_entropy_ptr entropy)
-{
-  register int temp, nbits;
-
-  if (entropy->EOBRUN > 0) {	/* if there is any pending EOBRUN */
-    temp = entropy->EOBRUN;
-    nbits = 0;
-    while ((temp >>= 1))
-      nbits++;
-    /* safety check: shouldn't happen given limited correction-bit buffer */
-    if (nbits > 14)
-      ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
-
-    emit_ac_symbol(entropy, entropy->ac_tbl_no, nbits << 4);
-    if (nbits)
-      emit_bits_e(entropy, entropy->EOBRUN, nbits);
-
-    entropy->EOBRUN = 0;
-
-    /* Emit any buffered correction bits */
-    emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE);
-    entropy->BE = 0;
-  }
-}
-
-
-/*
- * Emit a restart marker & resynchronize predictions.
- */
-
-LOCAL(boolean)
-emit_restart_s (working_state * state, int restart_num)
-{
-  int ci;
-
-  if (! flush_bits_s(state))
-    return FALSE;
-
-  emit_byte_s(state, 0xFF, return FALSE);
-  emit_byte_s(state, JPEG_RST0 + restart_num, return FALSE);
-
-  /* Re-initialize DC predictions to 0 */
-  for (ci = 0; ci < state->cinfo->comps_in_scan; ci++)
-    state->cur.last_dc_val[ci] = 0;
-
-  /* The restart counter is not updated until we successfully write the MCU. */
-
-  return TRUE;
-}
-
-
-LOCAL(void)
-emit_restart_e (huff_entropy_ptr entropy, int restart_num)
-{
-  int ci;
-
-  emit_eobrun(entropy);
-
-  if (! entropy->gather_statistics) {
-    flush_bits_e(entropy);
-    emit_byte_e(entropy, 0xFF);
-    emit_byte_e(entropy, JPEG_RST0 + restart_num);
-  }
-
-  if (entropy->cinfo->Ss == 0) {
-    /* Re-initialize DC predictions to 0 */
-    for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++)
-      entropy->saved.last_dc_val[ci] = 0;
-  } else {
-    /* Re-initialize all AC-related fields to 0 */
-    entropy->EOBRUN = 0;
-    entropy->BE = 0;
-  }
-}
-
-
-/*
- * MCU encoding for DC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  register int temp, temp2;
-  register int nbits;
-  int max_coef_bits;
-  int blkn, ci, tbl;
-  ISHIFT_TEMPS
-
-  entropy->next_output_byte = cinfo->dest->next_output_byte;
-  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval)
-    if (entropy->restarts_to_go == 0)
-      emit_restart_e(entropy, entropy->next_restart_num);
-
-  /* Since we're encoding a difference, the range limit is twice as much. */
-  max_coef_bits = cinfo->data_precision + 3;
-
-  /* Encode the MCU data blocks */
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    ci = cinfo->MCU_membership[blkn];
-    tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
-
-    /* Compute the DC value after the required point transform by Al.
-     * This is simply an arithmetic right shift.
-     */
-    temp = IRIGHT_SHIFT((int) (MCU_data[blkn][0][0]), cinfo->Al);
-
-    /* DC differences are figured on the point-transformed values. */
-    if ((temp2 = temp - entropy->saved.last_dc_val[ci]) == 0) {
-      /* Count/emit the Huffman-coded symbol for the number of bits */
-      emit_dc_symbol(entropy, tbl, 0);
-
-      continue;
-    }
-
-    entropy->saved.last_dc_val[ci] = temp;
-
-    /* Encode the DC coefficient difference per section G.1.2.1 */
-    if ((temp = temp2) < 0) {
-      temp = -temp;		/* temp is abs value of input */
-      /* For a negative input, want temp2 = bitwise complement of abs(input) */
-      /* This code assumes we are on a two's complement machine */
-      temp2--;
-    }
-
-    /* Find the number of bits needed for the magnitude of the coefficient */
-    nbits = 0;
-    do nbits++;			/* there must be at least one 1 bit */
-    while ((temp >>= 1));
-    /* Check for out-of-range coefficient values */
-    if (nbits > max_coef_bits)
-      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
-
-    /* Count/emit the Huffman-coded symbol for the number of bits */
-    emit_dc_symbol(entropy, tbl, nbits);
-
-    /* Emit that number of bits of the value, if positive, */
-    /* or the complement of its magnitude, if negative. */
-    emit_bits_e(entropy, (unsigned int) temp2, nbits);
-  }
-
-  cinfo->dest->next_output_byte = entropy->next_output_byte;
-  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
-
-  /* Update restart-interval state too */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU encoding for AC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  const int * natural_order;
-  JBLOCKROW block;
-  register int temp, temp2;
-  register int nbits;
-  register int r, k;
-  int Se, Al, max_coef_bits;
-
-  entropy->next_output_byte = cinfo->dest->next_output_byte;
-  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval)
-    if (entropy->restarts_to_go == 0)
-      emit_restart_e(entropy, entropy->next_restart_num);
-
-  Se = cinfo->Se;
-  Al = cinfo->Al;
-  natural_order = cinfo->natural_order;
-  max_coef_bits = cinfo->data_precision + 2;
-
-  /* Encode the MCU data block */
-  block = MCU_data[0];
-
-  /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */
-  
-  r = 0;			/* r = run length of zeros */
-   
-  for (k = cinfo->Ss; k <= Se; k++) {
-    if ((temp = (*block)[natural_order[k]]) == 0) {
-      r++;
-      continue;
-    }
-    /* We must apply the point transform by Al.  For AC coefficients this
-     * is an integer division with rounding towards 0.  To do this portably
-     * in C, we shift after obtaining the absolute value; so the code is
-     * interwoven with finding the abs value (temp) and output bits (temp2).
-     */
-    if (temp < 0) {
-      temp = -temp;		/* temp is abs value of input */
-      /* Apply the point transform, and watch out for case */
-      /* that nonzero coef is zero after point transform. */
-      if ((temp >>= Al) == 0) {
-	r++;
-	continue;
-      }
-      /* For a negative coef, want temp2 = bitwise complement of abs(coef) */
-      temp2 = ~temp;
-    } else {
-      /* Apply the point transform, and watch out for case */
-      /* that nonzero coef is zero after point transform. */
-      if ((temp >>= Al) == 0) {
-	r++;
-	continue;
-      }
-      temp2 = temp;
-    }
-
-    /* Emit any pending EOBRUN */
-    if (entropy->EOBRUN > 0)
-      emit_eobrun(entropy);
-    /* if run length > 15, must emit special run-length-16 codes (0xF0) */
-    while (r > 15) {
-      emit_ac_symbol(entropy, entropy->ac_tbl_no, 0xF0);
-      r -= 16;
-    }
-
-    /* Find the number of bits needed for the magnitude of the coefficient */
-    nbits = 0;
-    do nbits++;			/* there must be at least one 1 bit */
-    while ((temp >>= 1));
-    /* Check for out-of-range coefficient values */
-    if (nbits > max_coef_bits)
-      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
-
-    /* Count/emit Huffman symbol for run length / number of bits */
-    emit_ac_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits);
-
-    /* Emit that number of bits of the value, if positive, */
-    /* or the complement of its magnitude, if negative. */
-    emit_bits_e(entropy, (unsigned int) temp2, nbits);
-
-    r = 0;			/* reset zero run length */
-  }
-
-  if (r > 0) {			/* If there are trailing zeroes, */
-    entropy->EOBRUN++;		/* count an EOB */
-    if (entropy->EOBRUN == 0x7FFF)
-      emit_eobrun(entropy);	/* force it out to avoid overflow */
-  }
-
-  cinfo->dest->next_output_byte = entropy->next_output_byte;
-  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
-
-  /* Update restart-interval state too */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU encoding for DC successive approximation refinement scan.
- * Note: we assume such scans can be multi-component,
- * although the spec is not very clear on the point.
- */
-
-METHODDEF(boolean)
-encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int Al, blkn;
-
-  entropy->next_output_byte = cinfo->dest->next_output_byte;
-  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval)
-    if (entropy->restarts_to_go == 0)
-      emit_restart_e(entropy, entropy->next_restart_num);
-
-  Al = cinfo->Al;
-
-  /* Encode the MCU data blocks */
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    /* We simply emit the Al'th bit of the DC coefficient value. */
-    emit_bits_e(entropy, (unsigned int) (MCU_data[blkn][0][0] >> Al), 1);
-  }
-
-  cinfo->dest->next_output_byte = entropy->next_output_byte;
-  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
-
-  /* Update restart-interval state too */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU encoding for AC successive approximation refinement scan.
- */
-
-METHODDEF(boolean)
-encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  const int * natural_order;
-  JBLOCKROW block;
-  register int temp;
-  register int r, k;
-  int Se, Al;
-  int EOB;
-  char *BR_buffer;
-  unsigned int BR;
-  int absvalues[DCTSIZE2];
-
-  entropy->next_output_byte = cinfo->dest->next_output_byte;
-  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval)
-    if (entropy->restarts_to_go == 0)
-      emit_restart_e(entropy, entropy->next_restart_num);
-
-  Se = cinfo->Se;
-  Al = cinfo->Al;
-  natural_order = cinfo->natural_order;
-
-  /* Encode the MCU data block */
-  block = MCU_data[0];
-
-  /* It is convenient to make a pre-pass to determine the transformed
-   * coefficients' absolute values and the EOB position.
-   */
-  EOB = 0;
-  for (k = cinfo->Ss; k <= Se; k++) {
-    temp = (*block)[natural_order[k]];
-    /* We must apply the point transform by Al.  For AC coefficients this
-     * is an integer division with rounding towards 0.  To do this portably
-     * in C, we shift after obtaining the absolute value.
-     */
-    if (temp < 0)
-      temp = -temp;		/* temp is abs value of input */
-    temp >>= Al;		/* apply the point transform */
-    absvalues[k] = temp;	/* save abs value for main pass */
-    if (temp == 1)
-      EOB = k;			/* EOB = index of last newly-nonzero coef */
-  }
-
-  /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */
-  
-  r = 0;			/* r = run length of zeros */
-  BR = 0;			/* BR = count of buffered bits added now */
-  BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */
-
-  for (k = cinfo->Ss; k <= Se; k++) {
-    if ((temp = absvalues[k]) == 0) {
-      r++;
-      continue;
-    }
-
-    /* Emit any required ZRLs, but not if they can be folded into EOB */
-    while (r > 15 && k <= EOB) {
-      /* emit any pending EOBRUN and the BE correction bits */
-      emit_eobrun(entropy);
-      /* Emit ZRL */
-      emit_ac_symbol(entropy, entropy->ac_tbl_no, 0xF0);
-      r -= 16;
-      /* Emit buffered correction bits that must be associated with ZRL */
-      emit_buffered_bits(entropy, BR_buffer, BR);
-      BR_buffer = entropy->bit_buffer; /* BE bits are gone now */
-      BR = 0;
-    }
-
-    /* If the coef was previously nonzero, it only needs a correction bit.
-     * NOTE: a straight translation of the spec's figure G.7 would suggest
-     * that we also need to test r > 15.  But if r > 15, we can only get here
-     * if k > EOB, which implies that this coefficient is not 1.
-     */
-    if (temp > 1) {
-      /* The correction bit is the next bit of the absolute value. */
-      BR_buffer[BR++] = (char) (temp & 1);
-      continue;
-    }
-
-    /* Emit any pending EOBRUN and the BE correction bits */
-    emit_eobrun(entropy);
-
-    /* Count/emit Huffman symbol for run length / number of bits */
-    emit_ac_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1);
-
-    /* Emit output bit for newly-nonzero coef */
-    temp = ((*block)[natural_order[k]] < 0) ? 0 : 1;
-    emit_bits_e(entropy, (unsigned int) temp, 1);
-
-    /* Emit buffered correction bits that must be associated with this code */
-    emit_buffered_bits(entropy, BR_buffer, BR);
-    BR_buffer = entropy->bit_buffer; /* BE bits are gone now */
-    BR = 0;
-    r = 0;			/* reset zero run length */
-  }
-
-  if (r > 0 || BR > 0) {	/* If there are trailing zeroes, */
-    entropy->EOBRUN++;		/* count an EOB */
-    entropy->BE += BR;		/* concat my correction bits to older ones */
-    /* We force out the EOB if we risk either:
-     * 1. overflow of the EOB counter;
-     * 2. overflow of the correction bit buffer during the next MCU.
-     */
-    if (entropy->EOBRUN == 0x7FFF || entropy->BE > (MAX_CORR_BITS-DCTSIZE2+1))
-      emit_eobrun(entropy);
-  }
-
-  cinfo->dest->next_output_byte = entropy->next_output_byte;
-  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
-
-  /* Update restart-interval state too */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  return TRUE;
-}
-
-
-/* Encode a single block's worth of coefficients */
-
-LOCAL(boolean)
-encode_one_block (working_state * state, JCOEFPTR block, int last_dc_val,
-		  c_derived_tbl *dctbl, c_derived_tbl *actbl)
-{
-  register int temp, temp2;
-  register int nbits;
-  register int r, k;
-  int Se = state->cinfo->lim_Se;
-  int max_coef_bits = state->cinfo->data_precision + 3;
-  const int * natural_order = state->cinfo->natural_order;
-
-  /* Encode the DC coefficient difference per section F.1.2.1 */
-
-  if ((temp = block[0] - last_dc_val) == 0) {
-    /* Emit the Huffman-coded symbol for the number of bits */
-    if (! emit_bits_s(state, dctbl->ehufco[0], dctbl->ehufsi[0]))
-      return FALSE;
-  } else {
-    if ((temp2 = temp) < 0) {
-      temp = -temp;		/* temp is abs value of input */
-      /* For a negative input, want temp2 = bitwise complement of abs(input) */
-      /* This code assumes we are on a two's complement machine */
-      temp2--;
-    }
-
-    /* Find the number of bits needed for the magnitude of the coefficient */
-    nbits = 0;
-    do nbits++;			/* there must be at least one 1 bit */
-    while ((temp >>= 1));
-    /* Check for out-of-range coefficient values.
-     * Since we're encoding a difference, the range limit is twice as much.
-     */
-    if (nbits > max_coef_bits)
-      ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
-
-    /* Emit the Huffman-coded symbol for the number of bits */
-    if (! emit_bits_s(state, dctbl->ehufco[nbits], dctbl->ehufsi[nbits]))
-      return FALSE;
-
-    /* Emit that number of bits of the value, if positive, */
-    /* or the complement of its magnitude, if negative. */
-    if (! emit_bits_s(state, (unsigned int) temp2, nbits))
-      return FALSE;
-  }
-
-  /* Encode the AC coefficients per section F.1.2.2 */
-
-  r = 0;			/* r = run length of zeros */
-
-  for (k = 1; k <= Se; k++) {
-    if ((temp = block[natural_order[k]]) == 0) {
-      r++;
-      continue;
-    }
-
-    /* if run length > 15, must emit special run-length-16 codes (0xF0) */
-    while (r > 15) {
-      if (! emit_bits_s(state, actbl->ehufco[0xF0], actbl->ehufsi[0xF0]))
-	return FALSE;
-      r -= 16;
-    }
-
-    if ((temp2 = temp) < 0) {
-      temp = -temp;		/* temp is abs value of input */
-      /* For a negative coef, want temp2 = bitwise complement of abs(coef) */
-      /* This code assumes we are on a two's complement machine */
-      temp2--;
-    }
-
-    /* Find the number of bits needed for the magnitude of the coefficient */
-    nbits = 0;
-    do nbits++;			/* there must be at least one 1 bit */
-    while ((temp >>= 1));
-    /* Check for out-of-range coefficient values.
-     * Use ">=" instead of ">" so can use the
-     * same one larger limit from DC check here.
-     */
-    if (nbits >= max_coef_bits)
-      ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
-
-    /* Emit Huffman symbol for run length / number of bits */
-    temp = (r << 4) + nbits;
-    if (! emit_bits_s(state, actbl->ehufco[temp], actbl->ehufsi[temp]))
-      return FALSE;
-
-    /* Emit that number of bits of the value, if positive, */
-    /* or the complement of its magnitude, if negative. */
-    if (! emit_bits_s(state, (unsigned int) temp2, nbits))
-      return FALSE;
-
-    r = 0;			/* reset zero run length */
-  }
-
-  /* If the last coef(s) were zero, emit an end-of-block code */
-  if (r > 0)
-    if (! emit_bits_s(state, actbl->ehufco[0], actbl->ehufsi[0]))
-      return FALSE;
-
-  return TRUE;
-}
-
-
-/*
- * Encode and output one MCU's worth of Huffman-compressed coefficients.
- */
-
-METHODDEF(boolean)
-encode_mcu_huff (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  working_state state;
-  int blkn, ci;
-  jpeg_component_info * compptr;
-
-  /* Load up working state */
-  state.next_output_byte = cinfo->dest->next_output_byte;
-  state.free_in_buffer = cinfo->dest->free_in_buffer;
-  ASSIGN_STATE(state.cur, entropy->saved);
-  state.cinfo = cinfo;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      if (! emit_restart_s(&state, entropy->next_restart_num))
-	return FALSE;
-  }
-
-  /* Encode the MCU data blocks */
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    ci = cinfo->MCU_membership[blkn];
-    compptr = cinfo->cur_comp_info[ci];
-    if (! encode_one_block(&state,
-			   MCU_data[blkn][0], state.cur.last_dc_val[ci],
-			   entropy->dc_derived_tbls[compptr->dc_tbl_no],
-			   entropy->ac_derived_tbls[compptr->ac_tbl_no]))
-      return FALSE;
-    /* Update last_dc_val */
-    state.cur.last_dc_val[ci] = MCU_data[blkn][0][0];
-  }
-
-  /* Completed MCU, so update state */
-  cinfo->dest->next_output_byte = state.next_output_byte;
-  cinfo->dest->free_in_buffer = state.free_in_buffer;
-  ASSIGN_STATE(entropy->saved, state.cur);
-
-  /* Update restart-interval state too */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  return TRUE;
-}
-
-
-/*
- * Finish up at the end of a Huffman-compressed scan.
- */
-
-METHODDEF(void)
-finish_pass_huff (j_compress_ptr cinfo)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  working_state state;
-
-  if (cinfo->progressive_mode) {
-    entropy->next_output_byte = cinfo->dest->next_output_byte;
-    entropy->free_in_buffer = cinfo->dest->free_in_buffer;
-
-    /* Flush out any buffered data */
-    emit_eobrun(entropy);
-    flush_bits_e(entropy);
-
-    cinfo->dest->next_output_byte = entropy->next_output_byte;
-    cinfo->dest->free_in_buffer = entropy->free_in_buffer;
-  } else {
-    /* Load up working state ... flush_bits needs it */
-    state.next_output_byte = cinfo->dest->next_output_byte;
-    state.free_in_buffer = cinfo->dest->free_in_buffer;
-    ASSIGN_STATE(state.cur, entropy->saved);
-    state.cinfo = cinfo;
-
-    /* Flush out the last data */
-    if (! flush_bits_s(&state))
-      ERREXIT(cinfo, JERR_CANT_SUSPEND);
-
-    /* Update state */
-    cinfo->dest->next_output_byte = state.next_output_byte;
-    cinfo->dest->free_in_buffer = state.free_in_buffer;
-    ASSIGN_STATE(entropy->saved, state.cur);
-  }
-}
-
-
-/*
- * Huffman coding optimization.
- *
- * We first scan the supplied data and count the number of uses of each symbol
- * that is to be Huffman-coded. (This process MUST agree with the code above.)
- * Then we build a Huffman coding tree for the observed counts.
- * Symbols which are not needed at all for the particular image are not
- * assigned any code, which saves space in the DHT marker as well as in
- * the compressed data.
- */
-
-
-/* Process a single block's worth of coefficients */
-
-LOCAL(void)
-htest_one_block (j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val,
-		 long dc_counts[], long ac_counts[])
-{
-  register int temp;
-  register int nbits;
-  register int r, k;
-  int Se = cinfo->lim_Se;
-  int max_coef_bits = cinfo->data_precision + 3;
-  const int * natural_order = cinfo->natural_order;
-
-  /* Encode the DC coefficient difference per section F.1.2.1 */
-
-  if ((temp = block[0] - last_dc_val) == 0) {
-    /* Count the Huffman symbol for the number of bits */
-    dc_counts[0]++;
-  } else {
-    if (temp < 0)
-      temp = -temp;		/* temp is abs value of input */
-
-    /* Find the number of bits needed for the magnitude of the coefficient */
-    nbits = 0;
-    do nbits++;			/* there must be at least one 1 bit */
-    while ((temp >>= 1));
-    /* Check for out-of-range coefficient values.
-     * Since we're encoding a difference, the range limit is twice as much.
-     */
-    if (nbits > max_coef_bits)
-      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
-
-    /* Count the Huffman symbol for the number of bits */
-    dc_counts[nbits]++;
-  }
-
-  /* Encode the AC coefficients per section F.1.2.2 */
-
-  r = 0;			/* r = run length of zeros */
-
-  for (k = 1; k <= Se; k++) {
-    if ((temp = block[natural_order[k]]) == 0) {
-      r++;
-      continue;
-    }
-
-    /* if run length > 15, must emit special run-length-16 codes (0xF0) */
-    while (r > 15) {
-      ac_counts[0xF0]++;
-      r -= 16;
-    }
-
-    if (temp < 0)
-      temp = -temp;		/* temp is abs value of input */
-
-    /* Find the number of bits needed for the magnitude of the coefficient */
-    nbits = 0;
-    do nbits++;			/* there must be at least one 1 bit */
-    while ((temp >>= 1));
-    /* Check for out-of-range coefficient values.
-     * Use ">=" instead of ">" so can use the
-     * same one larger limit from DC check here.
-     */
-    if (nbits >= max_coef_bits)
-      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
-
-    /* Count Huffman symbol for run length / number of bits */
-    ac_counts[(r << 4) + nbits]++;
-
-    r = 0;			/* reset zero run length */
-  }
-
-  /* If the last coef(s) were zero, emit an end-of-block code */
-  if (r > 0)
-    ac_counts[0]++;
-}
-
-
-/*
- * Trial-encode one MCU's worth of Huffman-compressed coefficients.
- * No data is actually output, so no suspension return is possible.
- */
-
-METHODDEF(boolean)
-encode_mcu_gather (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int blkn, ci;
-  jpeg_component_info * compptr;
-
-  /* Take care of restart intervals if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      /* Re-initialize DC predictions to 0 */
-      for (ci = 0; ci < cinfo->comps_in_scan; ci++)
-	entropy->saved.last_dc_val[ci] = 0;
-      /* Update restart state */
-      entropy->restarts_to_go = cinfo->restart_interval;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    ci = cinfo->MCU_membership[blkn];
-    compptr = cinfo->cur_comp_info[ci];
-    htest_one_block(cinfo, MCU_data[blkn][0], entropy->saved.last_dc_val[ci],
-		    entropy->dc_count_ptrs[compptr->dc_tbl_no],
-		    entropy->ac_count_ptrs[compptr->ac_tbl_no]);
-    entropy->saved.last_dc_val[ci] = MCU_data[blkn][0][0];
-  }
-
-  return TRUE;
-}
-
-
-/*
- * Generate the best Huffman code table for the given counts, fill htbl.
- *
- * The JPEG standard requires that no symbol be assigned a codeword of all
- * one bits (so that padding bits added at the end of a compressed segment
- * can't look like a valid code).  Because of the canonical ordering of
- * codewords, this just means that there must be an unused slot in the
- * longest codeword length category.  Section K.2 of the JPEG spec suggests
- * reserving such a slot by pretending that symbol 256 is a valid symbol
- * with count 1.  In theory that's not optimal; giving it count zero but
- * including it in the symbol set anyway should give a better Huffman code.
- * But the theoretically better code actually seems to come out worse in
- * practice, because it produces more all-ones bytes (which incur stuffed
- * zero bytes in the final file).  In any case the difference is tiny.
- *
- * The JPEG standard requires Huffman codes to be no more than 16 bits long.
- * If some symbols have a very small but nonzero probability, the Huffman tree
- * must be adjusted to meet the code length restriction.  We currently use
- * the adjustment method suggested in JPEG section K.2.  This method is *not*
- * optimal; it may not choose the best possible limited-length code.  But
- * typically only very-low-frequency symbols will be given less-than-optimal
- * lengths, so the code is almost optimal.  Experimental comparisons against
- * an optimal limited-length-code algorithm indicate that the difference is
- * microscopic --- usually less than a hundredth of a percent of total size.
- * So the extra complexity of an optimal algorithm doesn't seem worthwhile.
- */
-
-LOCAL(void)
-jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[])
-{
-#define MAX_CLEN 32		/* assumed maximum initial code length */
-  UINT8 bits[MAX_CLEN+1];	/* bits[k] = # of symbols with code length k */
-  int codesize[257];		/* codesize[k] = code length of symbol k */
-  int others[257];		/* next symbol in current branch of tree */
-  int c1, c2, i, j;
-  UINT8 *p;
-  long v;
-
-  freq[256] = 1;		/* make sure 256 has a nonzero count */
-  /* Including the pseudo-symbol 256 in the Huffman procedure guarantees
-   * that no real symbol is given code-value of all ones, because 256
-   * will be placed last in the largest codeword category.
-   * In the symbol list build procedure this element serves as sentinel
-   * for the zero run loop.
-   */
-
-#ifndef DONT_USE_FANCY_HUFF_OPT
-
-  /* Build list of symbols sorted in order of descending frequency */
-  /* This approach has several benefits (thank to John Korejwa for the idea):
-   *     1.
-   * If a codelength category is split during the length limiting procedure
-   * below, the feature that more frequent symbols are assigned shorter
-   * codewords remains valid for the adjusted code.
-   *     2.
-   * To reduce consecutive ones in a Huffman data stream (thus reducing the
-   * number of stuff bytes in JPEG) it is preferable to follow 0 branches
-   * (and avoid 1 branches) as much as possible.  This is easily done by
-   * assigning symbols to leaves of the Huffman tree in order of decreasing
-   * frequency, with no secondary sort based on codelengths.
-   *     3.
-   * The symbol list can be built independently from the assignment of code
-   * lengths by the Huffman procedure below.
-   * Note: The symbol list build procedure must be performed first, because
-   * the Huffman procedure assigning the codelengths clobbers the frequency
-   * counts!
-   */
-
-  /* Here we use the others array as a linked list of nonzero frequencies
-   * to be sorted.  Already sorted elements are removed from the list.
-   */
-
-  /* Building list */
-
-  /* This item does not correspond to a valid symbol frequency and is used
-   * as starting index.
-   */
-  j = 256;
-
-  for (i = 0;; i++) {
-    if (freq[i] == 0)		/* skip zero frequencies */
-      continue;
-    if (i > 255)
-      break;
-    others[j] = i;		/* this symbol value */
-    j = i;			/* previous symbol value */
-  }
-  others[j] = -1;		/* mark end of list */
-
-  /* Sorting list */
-
-  p = htbl->huffval;
-  while ((c1 = others[256]) >= 0) {
-    v = freq[c1];
-    i = c1;			/* first symbol value */
-    j = 256;			/* pseudo symbol value for starting index */
-    while ((c2 = others[c1]) >= 0) {
-      if (freq[c2] > v) {
-	v = freq[c2];
-	i = c2;			/* this symbol value */
-	j = c1;			/* previous symbol value */
-      }
-      c1 = c2;
-    }
-    others[j] = others[i];	/* remove this symbol i from list */
-    *p++ = (UINT8) i;
-  }
-
-#endif /* DONT_USE_FANCY_HUFF_OPT */
-
-  /* This algorithm is explained in section K.2 of the JPEG standard */
-
-  MEMZERO(bits, SIZEOF(bits));
-  MEMZERO(codesize, SIZEOF(codesize));
-  for (i = 0; i < 257; i++)
-    others[i] = -1;		/* init links to empty */
-
-  /* Huffman's basic algorithm to assign optimal code lengths to symbols */
-
-  for (;;) {
-    /* Find the smallest nonzero frequency, set c1 = its symbol */
-    /* In case of ties, take the larger symbol number */
-    c1 = -1;
-    v = 1000000000L;
-    for (i = 0; i <= 256; i++) {
-      if (freq[i] && freq[i] <= v) {
-	v = freq[i];
-	c1 = i;
-      }
-    }
-
-    /* Find the next smallest nonzero frequency, set c2 = its symbol */
-    /* In case of ties, take the larger symbol number */
-    c2 = -1;
-    v = 1000000000L;
-    for (i = 0; i <= 256; i++) {
-      if (freq[i] && freq[i] <= v && i != c1) {
-	v = freq[i];
-	c2 = i;
-      }
-    }
-
-    /* Done if we've merged everything into one frequency */
-    if (c2 < 0)
-      break;
-
-    /* Else merge the two counts/trees */
-    freq[c1] += freq[c2];
-    freq[c2] = 0;
-
-    /* Increment the codesize of everything in c1's tree branch */
-    codesize[c1]++;
-    while (others[c1] >= 0) {
-      c1 = others[c1];
-      codesize[c1]++;
-    }
-
-    others[c1] = c2;		/* chain c2 onto c1's tree branch */
-
-    /* Increment the codesize of everything in c2's tree branch */
-    codesize[c2]++;
-    while (others[c2] >= 0) {
-      c2 = others[c2];
-      codesize[c2]++;
-    }
-  }
-
-  /* Now count the number of symbols of each code length */
-  for (i = 0; i <= 256; i++) {
-    if (codesize[i]) {
-      /* The JPEG standard seems to think that this can't happen, */
-      /* but I'm paranoid... */
-      if (codesize[i] > MAX_CLEN)
-	ERREXIT(cinfo, JERR_HUFF_CLEN_OUTOFBOUNDS);
-
-      bits[codesize[i]]++;
-    }
-  }
-
-  /* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
-   * Huffman procedure assigned any such lengths, we must adjust the coding.
-   * Here is what the JPEG spec says about how this next bit works:
-   * Since symbols are paired for the longest Huffman code, the symbols are
-   * removed from this length category two at a time.  The prefix for the pair
-   * (which is one bit shorter) is allocated to one of the pair; then,
-   * skipping the BITS entry for that prefix length, a code word from the next
-   * shortest nonzero BITS entry is converted into a prefix for two code words
-   * one bit longer.
-   */
-
-  for (i = MAX_CLEN; i > 16; i--) {
-    while (bits[i] > 0) {
-      j = i - 2;		/* find length of new prefix to be used */
-      while (bits[j] == 0) {
-	if (j == 0)
-	  ERREXIT(cinfo, JERR_HUFF_CLEN_OUTOFBOUNDS);
-	j--;
-      }
-
-      bits[i] -= 2;		/* remove two symbols */
-      bits[i-1]++;		/* one goes in this length */
-      bits[j+1] += 2;		/* two new symbols in this length */
-      bits[j]--;		/* symbol of this length is now a prefix */
-    }
-  }
-
-  /* Remove the count for the pseudo-symbol 256 from the largest codelength */
-  while (bits[i] == 0)		/* find largest codelength still in use */
-    i--;
-  bits[i]--;
-
-  /* Return final symbol counts (only for lengths 0..16) */
-  MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits));
-
-#ifdef DONT_USE_FANCY_HUFF_OPT
-
-  /* Return a list of the symbols sorted by code length */
-  /* Note: Due to the codelength changes made above, it can happen
-   * that more frequent symbols are assigned longer codewords.
-   */
-  p = htbl->huffval;
-  for (i = 1; i <= MAX_CLEN; i++) {
-    for (j = 0; j <= 255; j++) {
-      if (codesize[j] == i) {
-	*p++ = (UINT8) j;
-      }
-    }
-  }
-
-#endif /* DONT_USE_FANCY_HUFF_OPT */
-
-  /* Set sent_table FALSE so updated table will be written to JPEG file. */
-  htbl->sent_table = FALSE;
-}
-
-
-/*
- * Finish up a statistics-gathering pass and create the new Huffman tables.
- */
-
-METHODDEF(void)
-finish_pass_gather (j_compress_ptr cinfo)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int ci, tbl;
-  jpeg_component_info * compptr;
-  JHUFF_TBL **htblptr;
-  boolean did_dc[NUM_HUFF_TBLS];
-  boolean did_ac[NUM_HUFF_TBLS];
-
-  if (cinfo->progressive_mode)
-    /* Flush out buffered data (all we care about is counting the EOB symbol) */
-    emit_eobrun(entropy);
-
-  /* It's important not to apply jpeg_gen_optimal_table more than once
-   * per table, because it clobbers the input frequency counts!
-   */
-  MEMZERO(did_dc, SIZEOF(did_dc));
-  MEMZERO(did_ac, SIZEOF(did_ac));
-
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    /* DC needs no table for refinement scan */
-    if (cinfo->Ss == 0 && cinfo->Ah == 0) {
-      tbl = compptr->dc_tbl_no;
-      if (! did_dc[tbl]) {
-	htblptr = & cinfo->dc_huff_tbl_ptrs[tbl];
-	if (*htblptr == NULL)
-	  *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
-	jpeg_gen_optimal_table(cinfo, *htblptr, entropy->dc_count_ptrs[tbl]);
-	did_dc[tbl] = TRUE;
-      }
-    }
-    /* AC needs no table when not present */
-    if (cinfo->Se) {
-      tbl = compptr->ac_tbl_no;
-      if (! did_ac[tbl]) {
-	htblptr = & cinfo->ac_huff_tbl_ptrs[tbl];
-	if (*htblptr == NULL)
-	  *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
-	jpeg_gen_optimal_table(cinfo, *htblptr, entropy->ac_count_ptrs[tbl]);
-	did_ac[tbl] = TRUE;
-      }
-    }
-  }
-}
-
-
-/*
- * Initialize for a Huffman-compressed scan.
- * If gather_statistics is TRUE, we do not output anything during the scan,
- * just count the Huffman symbols used and generate Huffman code tables.
- */
-
-METHODDEF(void)
-start_pass_huff (j_compress_ptr cinfo, boolean gather_statistics)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int ci, tbl;
-  jpeg_component_info * compptr;
-
-  if (gather_statistics)
-    entropy->pub.finish_pass = finish_pass_gather;
-  else
-    entropy->pub.finish_pass = finish_pass_huff;
-
-  if (cinfo->progressive_mode) {
-    entropy->cinfo = cinfo;
-    entropy->gather_statistics = gather_statistics;
-
-    /* We assume jcmaster.c already validated the scan parameters. */
-
-    /* Select execution routine */
-    if (cinfo->Ah == 0) {
-      if (cinfo->Ss == 0)
-	entropy->pub.encode_mcu = encode_mcu_DC_first;
-      else
-	entropy->pub.encode_mcu = encode_mcu_AC_first;
-    } else {
-      if (cinfo->Ss == 0)
-	entropy->pub.encode_mcu = encode_mcu_DC_refine;
-      else {
-	entropy->pub.encode_mcu = encode_mcu_AC_refine;
-	/* AC refinement needs a correction bit buffer */
-	if (entropy->bit_buffer == NULL)
-	  entropy->bit_buffer = (char *) (*cinfo->mem->alloc_small)
-	    ((j_common_ptr) cinfo, JPOOL_IMAGE, MAX_CORR_BITS * SIZEOF(char));
-      }
-    }
-
-    /* Initialize AC stuff */
-    entropy->ac_tbl_no = cinfo->cur_comp_info[0]->ac_tbl_no;
-    entropy->EOBRUN = 0;
-    entropy->BE = 0;
-  } else {
-    if (gather_statistics)
-      entropy->pub.encode_mcu = encode_mcu_gather;
-    else
-      entropy->pub.encode_mcu = encode_mcu_huff;
-  }
-
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    /* DC needs no table for refinement scan */
-    if (cinfo->Ss == 0 && cinfo->Ah == 0) {
-      tbl = compptr->dc_tbl_no;
-      if (gather_statistics) {
-	/* Check for invalid table index */
-	/* (make_c_derived_tbl does this in the other path) */
-	if (tbl < 0 || tbl >= NUM_HUFF_TBLS)
-	  ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);
-	/* Allocate and zero the statistics tables */
-	/* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
-	if (entropy->dc_count_ptrs[tbl] == NULL)
-	  entropy->dc_count_ptrs[tbl] = (long *) (*cinfo->mem->alloc_small)
-	    ((j_common_ptr) cinfo, JPOOL_IMAGE, 257 * SIZEOF(long));
-	MEMZERO(entropy->dc_count_ptrs[tbl], 257 * SIZEOF(long));
-      } else {
-	/* Compute derived values for Huffman tables */
-	/* We may do this more than once for a table, but it's not expensive */
-	jpeg_make_c_derived_tbl(cinfo, TRUE, tbl,
-				& entropy->dc_derived_tbls[tbl]);
-      }
-      /* Initialize DC predictions to 0 */
-      entropy->saved.last_dc_val[ci] = 0;
-    }
-    /* AC needs no table when not present */
-    if (cinfo->Se) {
-      tbl = compptr->ac_tbl_no;
-      if (gather_statistics) {
-	if (tbl < 0 || tbl >= NUM_HUFF_TBLS)
-	  ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);
-	if (entropy->ac_count_ptrs[tbl] == NULL)
-	  entropy->ac_count_ptrs[tbl] = (long *) (*cinfo->mem->alloc_small)
-	    ((j_common_ptr) cinfo, JPOOL_IMAGE, 257 * SIZEOF(long));
-	MEMZERO(entropy->ac_count_ptrs[tbl], 257 * SIZEOF(long));
-      } else {
-	jpeg_make_c_derived_tbl(cinfo, FALSE, tbl,
-				& entropy->ac_derived_tbls[tbl]);
-      }
-    }
-  }
-
-  /* Initialize bit buffer to empty */
-  entropy->saved.put_buffer = 0;
-  entropy->saved.put_bits = 0;
-
-  /* Initialize restart stuff */
-  entropy->restarts_to_go = cinfo->restart_interval;
-  entropy->next_restart_num = 0;
-}
-
-
-/*
- * Module initialization routine for Huffman entropy encoding.
- */
-
-GLOBAL(void)
-jinit_huff_encoder (j_compress_ptr cinfo)
-{
-  huff_entropy_ptr entropy;
-  int i;
-
-  entropy = (huff_entropy_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(huff_entropy_encoder));
-  cinfo->entropy = &entropy->pub;
-  entropy->pub.start_pass = start_pass_huff;
-
-  /* Mark tables unallocated */
-  for (i = 0; i < NUM_HUFF_TBLS; i++) {
-    entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
-    entropy->dc_count_ptrs[i] = entropy->ac_count_ptrs[i] = NULL;
-  }
-
-  if (cinfo->progressive_mode)
-    entropy->bit_buffer = NULL;	/* needed only in AC refinement scan */
-}
diff --git a/dep/libjpeg/src/jcinit.c b/dep/libjpeg/src/jcinit.c
deleted file mode 100644
index 2aea7ca2a..000000000
--- a/dep/libjpeg/src/jcinit.c
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * jcinit.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2003-2017 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains initialization logic for the JPEG compressor.
- * This routine is in charge of selecting the modules to be executed and
- * making an initialization call to each one.
- *
- * Logically, this code belongs in jcmaster.c.  It's split out because
- * linking this routine implies linking the entire compression library.
- * For a transcoding-only application, we want to be able to use jcmaster.c
- * without linking in the whole library.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * Compute JPEG image dimensions and related values.
- * NOTE: this is exported for possible use by application.
- * Hence it mustn't do anything that can't be done twice.
- */
-
-GLOBAL(void)
-jpeg_calc_jpeg_dimensions (j_compress_ptr cinfo)
-/* Do computations that are needed before master selection phase */
-{
-  /* Sanity check on input image dimensions to prevent overflow in
-   * following calculations.
-   * We do check jpeg_width and jpeg_height in initial_setup in jcmaster.c,
-   * but image_width and image_height can come from arbitrary data,
-   * and we need some space for multiplication by block_size.
-   */
-  if (((long) cinfo->image_width >> 24) || ((long) cinfo->image_height >> 24))
-    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
-
-#ifdef DCT_SCALING_SUPPORTED
-
-  /* Compute actual JPEG image dimensions and DCT scaling choices. */
-  if (cinfo->scale_num >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/1 scaling */
-    cinfo->jpeg_width = cinfo->image_width * cinfo->block_size;
-    cinfo->jpeg_height = cinfo->image_height * cinfo->block_size;
-    cinfo->min_DCT_h_scaled_size = 1;
-    cinfo->min_DCT_v_scaled_size = 1;
-  } else if (cinfo->scale_num * 2 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/2 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 2L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 2L);
-    cinfo->min_DCT_h_scaled_size = 2;
-    cinfo->min_DCT_v_scaled_size = 2;
-  } else if (cinfo->scale_num * 3 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/3 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 3L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 3L);
-    cinfo->min_DCT_h_scaled_size = 3;
-    cinfo->min_DCT_v_scaled_size = 3;
-  } else if (cinfo->scale_num * 4 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/4 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 4L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 4L);
-    cinfo->min_DCT_h_scaled_size = 4;
-    cinfo->min_DCT_v_scaled_size = 4;
-  } else if (cinfo->scale_num * 5 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/5 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 5L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 5L);
-    cinfo->min_DCT_h_scaled_size = 5;
-    cinfo->min_DCT_v_scaled_size = 5;
-  } else if (cinfo->scale_num * 6 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/6 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 6L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 6L);
-    cinfo->min_DCT_h_scaled_size = 6;
-    cinfo->min_DCT_v_scaled_size = 6;
-  } else if (cinfo->scale_num * 7 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/7 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 7L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 7L);
-    cinfo->min_DCT_h_scaled_size = 7;
-    cinfo->min_DCT_v_scaled_size = 7;
-  } else if (cinfo->scale_num * 8 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/8 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 8L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 8L);
-    cinfo->min_DCT_h_scaled_size = 8;
-    cinfo->min_DCT_v_scaled_size = 8;
-  } else if (cinfo->scale_num * 9 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/9 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 9L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 9L);
-    cinfo->min_DCT_h_scaled_size = 9;
-    cinfo->min_DCT_v_scaled_size = 9;
-  } else if (cinfo->scale_num * 10 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/10 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 10L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 10L);
-    cinfo->min_DCT_h_scaled_size = 10;
-    cinfo->min_DCT_v_scaled_size = 10;
-  } else if (cinfo->scale_num * 11 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/11 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 11L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 11L);
-    cinfo->min_DCT_h_scaled_size = 11;
-    cinfo->min_DCT_v_scaled_size = 11;
-  } else if (cinfo->scale_num * 12 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/12 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 12L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 12L);
-    cinfo->min_DCT_h_scaled_size = 12;
-    cinfo->min_DCT_v_scaled_size = 12;
-  } else if (cinfo->scale_num * 13 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/13 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 13L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 13L);
-    cinfo->min_DCT_h_scaled_size = 13;
-    cinfo->min_DCT_v_scaled_size = 13;
-  } else if (cinfo->scale_num * 14 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/14 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 14L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 14L);
-    cinfo->min_DCT_h_scaled_size = 14;
-    cinfo->min_DCT_v_scaled_size = 14;
-  } else if (cinfo->scale_num * 15 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/15 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 15L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 15L);
-    cinfo->min_DCT_h_scaled_size = 15;
-    cinfo->min_DCT_v_scaled_size = 15;
-  } else {
-    /* Provide block_size/16 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 16L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 16L);
-    cinfo->min_DCT_h_scaled_size = 16;
-    cinfo->min_DCT_v_scaled_size = 16;
-  }
-
-#else /* !DCT_SCALING_SUPPORTED */
-
-  /* Hardwire it to "no scaling" */
-  cinfo->jpeg_width = cinfo->image_width;
-  cinfo->jpeg_height = cinfo->image_height;
-  cinfo->min_DCT_h_scaled_size = DCTSIZE;
-  cinfo->min_DCT_v_scaled_size = DCTSIZE;
-
-#endif /* DCT_SCALING_SUPPORTED */
-}
-
-
-/*
- * Master selection of compression modules.
- * This is done once at the start of processing an image.  We determine
- * which modules will be used and give them appropriate initialization calls.
- */
-
-GLOBAL(void)
-jinit_compress_master (j_compress_ptr cinfo)
-{
-  long samplesperrow;
-  JDIMENSION jd_samplesperrow;
-
-  /* For now, precision must match compiled-in value... */
-  if (cinfo->data_precision != BITS_IN_JSAMPLE)
-    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
-
-  /* Sanity check on input image dimensions */
-  if (cinfo->image_height <= 0 || cinfo->image_width <= 0 ||
-      cinfo->input_components <= 0)
-    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
-
-  /* Width of an input scanline must be representable as JDIMENSION. */
-  samplesperrow = (long) cinfo->image_width * (long) cinfo->input_components;
-  jd_samplesperrow = (JDIMENSION) samplesperrow;
-  if ((long) jd_samplesperrow != samplesperrow)
-    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
-
-  /* Compute JPEG image dimensions and related values. */
-  jpeg_calc_jpeg_dimensions(cinfo);
-
-  /* Initialize master control (includes parameter checking/processing) */
-  jinit_c_master_control(cinfo, FALSE /* full compression */);
-
-  /* Preprocessing */
-  if (! cinfo->raw_data_in) {
-    jinit_color_converter(cinfo);
-    jinit_downsampler(cinfo);
-    jinit_c_prep_controller(cinfo, FALSE /* never need full buffer here */);
-  }
-  /* Forward DCT */
-  jinit_forward_dct(cinfo);
-  /* Entropy encoding: either Huffman or arithmetic coding. */
-  if (cinfo->arith_code)
-    jinit_arith_encoder(cinfo);
-  else {
-    jinit_huff_encoder(cinfo);
-  }
-
-  /* Need a full-image coefficient buffer in any multi-pass mode. */
-  jinit_c_coef_controller(cinfo,
-		(boolean) (cinfo->num_scans > 1 || cinfo->optimize_coding));
-  jinit_c_main_controller(cinfo, FALSE /* never need full buffer here */);
-
-  jinit_marker_writer(cinfo);
-
-  /* We can now tell the memory manager to allocate virtual arrays. */
-  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
-
-  /* Write the datastream header (SOI) immediately.
-   * Frame and scan headers are postponed till later.
-   * This lets application insert special markers after the SOI.
-   */
-  (*cinfo->marker->write_file_header) (cinfo);
-}
diff --git a/dep/libjpeg/src/jcmainct.c b/dep/libjpeg/src/jcmainct.c
deleted file mode 100644
index 39b97902e..000000000
--- a/dep/libjpeg/src/jcmainct.c
+++ /dev/null
@@ -1,297 +0,0 @@
-/*
- * jcmainct.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2003-2012 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the main buffer controller for compression.
- * The main buffer lies between the pre-processor and the JPEG
- * compressor proper; it holds downsampled data in the JPEG colorspace.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Note: currently, there is no operating mode in which a full-image buffer
- * is needed at this step.  If there were, that mode could not be used with
- * "raw data" input, since this module is bypassed in that case.  However,
- * we've left the code here for possible use in special applications.
- */
-#undef FULL_MAIN_BUFFER_SUPPORTED
-
-
-/* Private buffer controller object */
-
-typedef struct {
-  struct jpeg_c_main_controller pub; /* public fields */
-
-  JDIMENSION cur_iMCU_row;	/* number of current iMCU row */
-  JDIMENSION rowgroup_ctr;	/* counts row groups received in iMCU row */
-  boolean suspended;		/* remember if we suspended output */
-  J_BUF_MODE pass_mode;		/* current operating mode */
-
-  /* If using just a strip buffer, this points to the entire set of buffers
-   * (we allocate one for each component).  In the full-image case, this
-   * points to the currently accessible strips of the virtual arrays.
-   */
-  JSAMPARRAY buffer[MAX_COMPONENTS];
-
-#ifdef FULL_MAIN_BUFFER_SUPPORTED
-  /* If using full-image storage, this array holds pointers to virtual-array
-   * control blocks for each component.  Unused if not full-image storage.
-   */
-  jvirt_sarray_ptr whole_image[MAX_COMPONENTS];
-#endif
-} my_main_controller;
-
-typedef my_main_controller * my_main_ptr;
-
-
-/* Forward declarations */
-METHODDEF(void) process_data_simple_main
-	JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf,
-	     JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail));
-#ifdef FULL_MAIN_BUFFER_SUPPORTED
-METHODDEF(void) process_data_buffer_main
-	JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf,
-	     JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail));
-#endif
-
-
-/*
- * Initialize for a processing pass.
- */
-
-METHODDEF(void)
-start_pass_main (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
-{
-  my_main_ptr mainp = (my_main_ptr) cinfo->main;
-
-  /* Do nothing in raw-data mode. */
-  if (cinfo->raw_data_in)
-    return;
-
-  mainp->cur_iMCU_row = 0;	/* initialize counters */
-  mainp->rowgroup_ctr = 0;
-  mainp->suspended = FALSE;
-  mainp->pass_mode = pass_mode;	/* save mode for use by process_data */
-
-  switch (pass_mode) {
-  case JBUF_PASS_THRU:
-#ifdef FULL_MAIN_BUFFER_SUPPORTED
-    if (mainp->whole_image[0] != NULL)
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-#endif
-    mainp->pub.process_data = process_data_simple_main;
-    break;
-#ifdef FULL_MAIN_BUFFER_SUPPORTED
-  case JBUF_SAVE_SOURCE:
-  case JBUF_CRANK_DEST:
-  case JBUF_SAVE_AND_PASS:
-    if (mainp->whole_image[0] == NULL)
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    mainp->pub.process_data = process_data_buffer_main;
-    break;
-#endif
-  default:
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    break;
-  }
-}
-
-
-/*
- * Process some data.
- * This routine handles the simple pass-through mode,
- * where we have only a strip buffer.
- */
-
-METHODDEF(void)
-process_data_simple_main (j_compress_ptr cinfo,
-			  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-			  JDIMENSION in_rows_avail)
-{
-  my_main_ptr mainp = (my_main_ptr) cinfo->main;
-
-  while (mainp->cur_iMCU_row < cinfo->total_iMCU_rows) {
-    /* Read input data if we haven't filled the main buffer yet */
-    if (mainp->rowgroup_ctr < (JDIMENSION) cinfo->min_DCT_v_scaled_size)
-      (*cinfo->prep->pre_process_data) (cinfo,
-					input_buf, in_row_ctr, in_rows_avail,
-					mainp->buffer, &mainp->rowgroup_ctr,
-					(JDIMENSION) cinfo->min_DCT_v_scaled_size);
-
-    /* If we don't have a full iMCU row buffered, return to application for
-     * more data.  Note that preprocessor will always pad to fill the iMCU row
-     * at the bottom of the image.
-     */
-    if (mainp->rowgroup_ctr != (JDIMENSION) cinfo->min_DCT_v_scaled_size)
-      return;
-
-    /* Send the completed row to the compressor */
-    if (! (*cinfo->coef->compress_data) (cinfo, mainp->buffer)) {
-      /* If compressor did not consume the whole row, then we must need to
-       * suspend processing and return to the application.  In this situation
-       * we pretend we didn't yet consume the last input row; otherwise, if
-       * it happened to be the last row of the image, the application would
-       * think we were done.
-       */
-      if (! mainp->suspended) {
-	(*in_row_ctr)--;
-	mainp->suspended = TRUE;
-      }
-      return;
-    }
-    /* We did finish the row.  Undo our little suspension hack if a previous
-     * call suspended; then mark the main buffer empty.
-     */
-    if (mainp->suspended) {
-      (*in_row_ctr)++;
-      mainp->suspended = FALSE;
-    }
-    mainp->rowgroup_ctr = 0;
-    mainp->cur_iMCU_row++;
-  }
-}
-
-
-#ifdef FULL_MAIN_BUFFER_SUPPORTED
-
-/*
- * Process some data.
- * This routine handles all of the modes that use a full-size buffer.
- */
-
-METHODDEF(void)
-process_data_buffer_main (j_compress_ptr cinfo,
-			  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-			  JDIMENSION in_rows_avail)
-{
-  my_main_ptr mainp = (my_main_ptr) cinfo->main;
-  int ci;
-  jpeg_component_info *compptr;
-  boolean writing = (mainp->pass_mode != JBUF_CRANK_DEST);
-
-  while (mainp->cur_iMCU_row < cinfo->total_iMCU_rows) {
-    /* Realign the virtual buffers if at the start of an iMCU row. */
-    if (mainp->rowgroup_ctr == 0) {
-      for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	   ci++, compptr++) {
-	mainp->buffer[ci] = (*cinfo->mem->access_virt_sarray)
-	  ((j_common_ptr) cinfo, mainp->whole_image[ci], mainp->cur_iMCU_row *
-	   ((JDIMENSION) (compptr->v_samp_factor * cinfo->min_DCT_v_scaled_size)),
-	   (JDIMENSION) (compptr->v_samp_factor * cinfo->min_DCT_v_scaled_size),
-	   writing);
-      }
-      /* In a read pass, pretend we just read some source data. */
-      if (! writing) {
-	*in_row_ctr += (JDIMENSION)
-	  (cinfo->max_v_samp_factor * cinfo->min_DCT_v_scaled_size);
-	mainp->rowgroup_ctr = (JDIMENSION) cinfo->min_DCT_v_scaled_size;
-      }
-    }
-
-    /* If a write pass, read input data until the current iMCU row is full. */
-    /* Note: preprocessor will pad if necessary to fill the last iMCU row. */
-    if (writing) {
-      (*cinfo->prep->pre_process_data) (cinfo,
-					input_buf, in_row_ctr, in_rows_avail,
-					mainp->buffer, &mainp->rowgroup_ctr,
-					(JDIMENSION) cinfo->min_DCT_v_scaled_size);
-      /* Return to application if we need more data to fill the iMCU row. */
-      if (mainp->rowgroup_ctr < (JDIMENSION) cinfo->min_DCT_v_scaled_size)
-	return;
-    }
-
-    /* Emit data, unless this is a sink-only pass. */
-    if (mainp->pass_mode != JBUF_SAVE_SOURCE) {
-      if (! (*cinfo->coef->compress_data) (cinfo, mainp->buffer)) {
-	/* If compressor did not consume the whole row, then we must need to
-	 * suspend processing and return to the application.  In this situation
-	 * we pretend we didn't yet consume the last input row; otherwise, if
-	 * it happened to be the last row of the image, the application would
-	 * think we were done.
-	 */
-	if (! mainp->suspended) {
-	  (*in_row_ctr)--;
-	  mainp->suspended = TRUE;
-	}
-	return;
-      }
-      /* We did finish the row.  Undo our little suspension hack if a previous
-       * call suspended; then mark the main buffer empty.
-       */
-      if (mainp->suspended) {
-	(*in_row_ctr)++;
-	mainp->suspended = FALSE;
-      }
-    }
-
-    /* If get here, we are done with this iMCU row.  Mark buffer empty. */
-    mainp->rowgroup_ctr = 0;
-    mainp->cur_iMCU_row++;
-  }
-}
-
-#endif /* FULL_MAIN_BUFFER_SUPPORTED */
-
-
-/*
- * Initialize main buffer controller.
- */
-
-GLOBAL(void)
-jinit_c_main_controller (j_compress_ptr cinfo, boolean need_full_buffer)
-{
-  my_main_ptr mainp;
-  int ci;
-  jpeg_component_info *compptr;
-
-  mainp = (my_main_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_main_controller));
-  cinfo->main = &mainp->pub;
-  mainp->pub.start_pass = start_pass_main;
-
-  /* We don't need to create a buffer in raw-data mode. */
-  if (cinfo->raw_data_in)
-    return;
-
-  /* Create the buffer.  It holds downsampled data, so each component
-   * may be of a different size.
-   */
-  if (need_full_buffer) {
-#ifdef FULL_MAIN_BUFFER_SUPPORTED
-    /* Allocate a full-image virtual array for each component */
-    /* Note we pad the bottom to a multiple of the iMCU height */
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      mainp->whole_image[ci] = (*cinfo->mem->request_virt_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
-	 compptr->width_in_blocks * ((JDIMENSION) compptr->DCT_h_scaled_size),
-	 ((JDIMENSION) jround_up((long) compptr->height_in_blocks,
-				 (long) compptr->v_samp_factor)) *
-	 ((JDIMENSION) cinfo->min_DCT_v_scaled_size),
-	 (JDIMENSION) (compptr->v_samp_factor * compptr->DCT_v_scaled_size));
-    }
-#else
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-#endif
-  } else {
-#ifdef FULL_MAIN_BUFFER_SUPPORTED
-    mainp->whole_image[0] = NULL; /* flag for no virtual arrays */
-#endif
-    /* Allocate a strip buffer for each component */
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      mainp->buffer[ci] = (*cinfo->mem->alloc_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE,
-	 compptr->width_in_blocks * ((JDIMENSION) compptr->DCT_h_scaled_size),
-	 (JDIMENSION) (compptr->v_samp_factor * compptr->DCT_v_scaled_size));
-    }
-  }
-}
diff --git a/dep/libjpeg/src/jcmarker.c b/dep/libjpeg/src/jcmarker.c
deleted file mode 100644
index 8874cd867..000000000
--- a/dep/libjpeg/src/jcmarker.c
+++ /dev/null
@@ -1,717 +0,0 @@
-/*
- * jcmarker.c
- *
- * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2003-2019 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains routines to write JPEG datastream markers.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-typedef enum {			/* JPEG marker codes */
-  M_SOF0  = 0xc0,
-  M_SOF1  = 0xc1,
-  M_SOF2  = 0xc2,
-  M_SOF3  = 0xc3,
-
-  M_SOF5  = 0xc5,
-  M_SOF6  = 0xc6,
-  M_SOF7  = 0xc7,
-
-  M_JPG   = 0xc8,
-  M_SOF9  = 0xc9,
-  M_SOF10 = 0xca,
-  M_SOF11 = 0xcb,
-
-  M_SOF13 = 0xcd,
-  M_SOF14 = 0xce,
-  M_SOF15 = 0xcf,
-
-  M_DHT   = 0xc4,
-
-  M_DAC   = 0xcc,
-
-  M_RST0  = 0xd0,
-  M_RST1  = 0xd1,
-  M_RST2  = 0xd2,
-  M_RST3  = 0xd3,
-  M_RST4  = 0xd4,
-  M_RST5  = 0xd5,
-  M_RST6  = 0xd6,
-  M_RST7  = 0xd7,
-
-  M_SOI   = 0xd8,
-  M_EOI   = 0xd9,
-  M_SOS   = 0xda,
-  M_DQT   = 0xdb,
-  M_DNL   = 0xdc,
-  M_DRI   = 0xdd,
-  M_DHP   = 0xde,
-  M_EXP   = 0xdf,
-
-  M_APP0  = 0xe0,
-  M_APP1  = 0xe1,
-  M_APP2  = 0xe2,
-  M_APP3  = 0xe3,
-  M_APP4  = 0xe4,
-  M_APP5  = 0xe5,
-  M_APP6  = 0xe6,
-  M_APP7  = 0xe7,
-  M_APP8  = 0xe8,
-  M_APP9  = 0xe9,
-  M_APP10 = 0xea,
-  M_APP11 = 0xeb,
-  M_APP12 = 0xec,
-  M_APP13 = 0xed,
-  M_APP14 = 0xee,
-  M_APP15 = 0xef,
-
-  M_JPG0  = 0xf0,
-  M_JPG8  = 0xf8,
-  M_JPG13 = 0xfd,
-  M_COM   = 0xfe,
-
-  M_TEM   = 0x01,
-
-  M_ERROR = 0x100
-} JPEG_MARKER;
-
-
-/* Private state */
-
-typedef struct {
-  struct jpeg_marker_writer pub; /* public fields */
-
-  unsigned int last_restart_interval; /* last DRI value emitted; 0 after SOI */
-} my_marker_writer;
-
-typedef my_marker_writer * my_marker_ptr;
-
-
-/*
- * Basic output routines.
- *
- * Note that we do not support suspension while writing a marker.
- * Therefore, an application using suspension must ensure that there is
- * enough buffer space for the initial markers (typ. 600-700 bytes) before
- * calling jpeg_start_compress, and enough space to write the trailing EOI
- * (a few bytes) before calling jpeg_finish_compress.  Multipass compression
- * modes are not supported at all with suspension, so those two are the only
- * points where markers will be written.
- */
-
-LOCAL(void)
-emit_byte (j_compress_ptr cinfo, int val)
-/* Emit a byte */
-{
-  struct jpeg_destination_mgr * dest = cinfo->dest;
-
-  *(dest->next_output_byte)++ = (JOCTET) val;
-  if (--dest->free_in_buffer == 0) {
-    if (! (*dest->empty_output_buffer) (cinfo))
-      ERREXIT(cinfo, JERR_CANT_SUSPEND);
-  }
-}
-
-
-LOCAL(void)
-emit_marker (j_compress_ptr cinfo, JPEG_MARKER mark)
-/* Emit a marker code */
-{
-  emit_byte(cinfo, 0xFF);
-  emit_byte(cinfo, (int) mark);
-}
-
-
-LOCAL(void)
-emit_2bytes (j_compress_ptr cinfo, int value)
-/* Emit a 2-byte integer; these are always MSB first in JPEG files */
-{
-  emit_byte(cinfo, (value >> 8) & 0xFF);
-  emit_byte(cinfo, value & 0xFF);
-}
-
-
-/*
- * Routines to write specific marker types.
- */
-
-LOCAL(int)
-emit_dqt (j_compress_ptr cinfo, int index)
-/* Emit a DQT marker */
-/* Returns the precision used (0 = 8bits, 1 = 16bits) for baseline checking */
-{
-  JQUANT_TBL * qtbl = cinfo->quant_tbl_ptrs[index];
-  int prec;
-  int i;
-
-  if (qtbl == NULL)
-    ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, index);
-
-  prec = 0;
-  for (i = 0; i <= cinfo->lim_Se; i++) {
-    if (qtbl->quantval[cinfo->natural_order[i]] > 255)
-      prec = 1;
-  }
-
-  if (! qtbl->sent_table) {
-    emit_marker(cinfo, M_DQT);
-
-    emit_2bytes(cinfo,
-      prec ? cinfo->lim_Se * 2 + 2 + 1 + 2 : cinfo->lim_Se + 1 + 1 + 2);
-
-    emit_byte(cinfo, index + (prec<<4));
-
-    for (i = 0; i <= cinfo->lim_Se; i++) {
-      /* The table entries must be emitted in zigzag order. */
-      unsigned int qval = qtbl->quantval[cinfo->natural_order[i]];
-      if (prec)
-	emit_byte(cinfo, (int) (qval >> 8));
-      emit_byte(cinfo, (int) (qval & 0xFF));
-    }
-
-    qtbl->sent_table = TRUE;
-  }
-
-  return prec;
-}
-
-
-LOCAL(void)
-emit_dht (j_compress_ptr cinfo, int index, boolean is_ac)
-/* Emit a DHT marker */
-{
-  JHUFF_TBL * htbl;
-  int length, i;
-  
-  if (is_ac) {
-    htbl = cinfo->ac_huff_tbl_ptrs[index];
-    index += 0x10;		/* output index has AC bit set */
-  } else {
-    htbl = cinfo->dc_huff_tbl_ptrs[index];
-  }
-
-  if (htbl == NULL)
-    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, index);
-  
-  if (! htbl->sent_table) {
-    emit_marker(cinfo, M_DHT);
-    
-    length = 0;
-    for (i = 1; i <= 16; i++)
-      length += htbl->bits[i];
-    
-    emit_2bytes(cinfo, length + 2 + 1 + 16);
-    emit_byte(cinfo, index);
-    
-    for (i = 1; i <= 16; i++)
-      emit_byte(cinfo, htbl->bits[i]);
-    
-    for (i = 0; i < length; i++)
-      emit_byte(cinfo, htbl->huffval[i]);
-    
-    htbl->sent_table = TRUE;
-  }
-}
-
-
-LOCAL(void)
-emit_dac (j_compress_ptr cinfo)
-/* Emit a DAC marker */
-/* Since the useful info is so small, we want to emit all the tables in */
-/* one DAC marker.  Therefore this routine does its own scan of the table. */
-{
-#ifdef C_ARITH_CODING_SUPPORTED
-  char dc_in_use[NUM_ARITH_TBLS];
-  char ac_in_use[NUM_ARITH_TBLS];
-  int length, i;
-  jpeg_component_info *compptr;
-
-  for (i = 0; i < NUM_ARITH_TBLS; i++)
-    dc_in_use[i] = ac_in_use[i] = 0;
-
-  for (i = 0; i < cinfo->comps_in_scan; i++) {
-    compptr = cinfo->cur_comp_info[i];
-    /* DC needs no table for refinement scan */
-    if (cinfo->Ss == 0 && cinfo->Ah == 0)
-      dc_in_use[compptr->dc_tbl_no] = 1;
-    /* AC needs no table when not present */
-    if (cinfo->Se)
-      ac_in_use[compptr->ac_tbl_no] = 1;
-  }
-
-  length = 0;
-  for (i = 0; i < NUM_ARITH_TBLS; i++)
-    length += dc_in_use[i] + ac_in_use[i];
-
-  if (length) {
-    emit_marker(cinfo, M_DAC);
-
-    emit_2bytes(cinfo, length*2 + 2);
-
-    for (i = 0; i < NUM_ARITH_TBLS; i++) {
-      if (dc_in_use[i]) {
-	emit_byte(cinfo, i);
-	emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4));
-      }
-      if (ac_in_use[i]) {
-	emit_byte(cinfo, i + 0x10);
-	emit_byte(cinfo, cinfo->arith_ac_K[i]);
-      }
-    }
-  }
-#endif /* C_ARITH_CODING_SUPPORTED */
-}
-
-
-LOCAL(void)
-emit_dri (j_compress_ptr cinfo)
-/* Emit a DRI marker */
-{
-  emit_marker(cinfo, M_DRI);
-  
-  emit_2bytes(cinfo, 4);	/* fixed length */
-
-  emit_2bytes(cinfo, (int) cinfo->restart_interval);
-}
-
-
-LOCAL(void)
-emit_lse_ict (j_compress_ptr cinfo)
-/* Emit an LSE inverse color transform specification marker */
-{
-  /* Support only 1 transform */
-  if (cinfo->color_transform != JCT_SUBTRACT_GREEN ||
-      cinfo->num_components < 3)
-    ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-
-  emit_marker(cinfo, M_JPG8);
-  
-  emit_2bytes(cinfo, 24);	/* fixed length */
-
-  emit_byte(cinfo, 0x0D);	/* ID inverse transform specification */
-  emit_2bytes(cinfo, MAXJSAMPLE);	/* MAXTRANS */
-  emit_byte(cinfo, 3);		/* Nt=3 */
-  emit_byte(cinfo, cinfo->comp_info[1].component_id);
-  emit_byte(cinfo, cinfo->comp_info[0].component_id);
-  emit_byte(cinfo, cinfo->comp_info[2].component_id);
-  emit_byte(cinfo, 0x80);	/* F1: CENTER1=1, NORM1=0 */
-  emit_2bytes(cinfo, 0);	/* A(1,1)=0 */
-  emit_2bytes(cinfo, 0);	/* A(1,2)=0 */
-  emit_byte(cinfo, 0);		/* F2: CENTER2=0, NORM2=0 */
-  emit_2bytes(cinfo, 1);	/* A(2,1)=1 */
-  emit_2bytes(cinfo, 0);	/* A(2,2)=0 */
-  emit_byte(cinfo, 0);		/* F3: CENTER3=0, NORM3=0 */
-  emit_2bytes(cinfo, 1);	/* A(3,1)=1 */
-  emit_2bytes(cinfo, 0);	/* A(3,2)=0 */
-}
-
-
-LOCAL(void)
-emit_sof (j_compress_ptr cinfo, JPEG_MARKER code)
-/* Emit a SOF marker */
-{
-  int ci;
-  jpeg_component_info *compptr;
-  
-  emit_marker(cinfo, code);
-  
-  emit_2bytes(cinfo, 3 * cinfo->num_components + 2 + 5 + 1); /* length */
-
-  /* Make sure image isn't bigger than SOF field can handle */
-  if ((long) cinfo->jpeg_height > 65535L ||
-      (long) cinfo->jpeg_width > 65535L)
-    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) 65535);
-
-  emit_byte(cinfo, cinfo->data_precision);
-  emit_2bytes(cinfo, (int) cinfo->jpeg_height);
-  emit_2bytes(cinfo, (int) cinfo->jpeg_width);
-
-  emit_byte(cinfo, cinfo->num_components);
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    emit_byte(cinfo, compptr->component_id);
-    emit_byte(cinfo, (compptr->h_samp_factor << 4) + compptr->v_samp_factor);
-    emit_byte(cinfo, compptr->quant_tbl_no);
-  }
-}
-
-
-LOCAL(void)
-emit_sos (j_compress_ptr cinfo)
-/* Emit a SOS marker */
-{
-  int i, td, ta;
-  jpeg_component_info *compptr;
-  
-  emit_marker(cinfo, M_SOS);
-  
-  emit_2bytes(cinfo, 2 * cinfo->comps_in_scan + 2 + 1 + 3); /* length */
-  
-  emit_byte(cinfo, cinfo->comps_in_scan);
-  
-  for (i = 0; i < cinfo->comps_in_scan; i++) {
-    compptr = cinfo->cur_comp_info[i];
-    emit_byte(cinfo, compptr->component_id);
-
-    /* We emit 0 for unused field(s); this is recommended by the P&M text
-     * but does not seem to be specified in the standard.
-     */
-
-    /* DC needs no table for refinement scan */
-    td = cinfo->Ss == 0 && cinfo->Ah == 0 ? compptr->dc_tbl_no : 0;
-    /* AC needs no table when not present */
-    ta = cinfo->Se ? compptr->ac_tbl_no : 0;
-
-    emit_byte(cinfo, (td << 4) + ta);
-  }
-
-  emit_byte(cinfo, cinfo->Ss);
-  emit_byte(cinfo, cinfo->Se);
-  emit_byte(cinfo, (cinfo->Ah << 4) + cinfo->Al);
-}
-
-
-LOCAL(void)
-emit_pseudo_sos (j_compress_ptr cinfo)
-/* Emit a pseudo SOS marker */
-{
-  emit_marker(cinfo, M_SOS);
-  
-  emit_2bytes(cinfo, 2 + 1 + 3); /* length */
-  
-  emit_byte(cinfo, 0); /* Ns */
-
-  emit_byte(cinfo, 0); /* Ss */
-  emit_byte(cinfo, cinfo->block_size * cinfo->block_size - 1); /* Se */
-  emit_byte(cinfo, 0); /* Ah/Al */
-}
-
-
-LOCAL(void)
-emit_jfif_app0 (j_compress_ptr cinfo)
-/* Emit a JFIF-compliant APP0 marker */
-{
-  /*
-   * Length of APP0 block	(2 bytes)
-   * Block ID			(4 bytes - ASCII "JFIF")
-   * Zero byte			(1 byte to terminate the ID string)
-   * Version Major, Minor	(2 bytes - major first)
-   * Units			(1 byte - 0x00 = none, 0x01 = inch, 0x02 = cm)
-   * Xdpu			(2 bytes - dots per unit horizontal)
-   * Ydpu			(2 bytes - dots per unit vertical)
-   * Thumbnail X size		(1 byte)
-   * Thumbnail Y size		(1 byte)
-   */
-  
-  emit_marker(cinfo, M_APP0);
-  
-  emit_2bytes(cinfo, 2 + 4 + 1 + 2 + 1 + 2 + 2 + 1 + 1); /* length */
-
-  emit_byte(cinfo, 0x4A);	/* Identifier: ASCII "JFIF" */
-  emit_byte(cinfo, 0x46);
-  emit_byte(cinfo, 0x49);
-  emit_byte(cinfo, 0x46);
-  emit_byte(cinfo, 0);
-  emit_byte(cinfo, cinfo->JFIF_major_version); /* Version fields */
-  emit_byte(cinfo, cinfo->JFIF_minor_version);
-  emit_byte(cinfo, cinfo->density_unit); /* Pixel size information */
-  emit_2bytes(cinfo, (int) cinfo->X_density);
-  emit_2bytes(cinfo, (int) cinfo->Y_density);
-  emit_byte(cinfo, 0);		/* No thumbnail image */
-  emit_byte(cinfo, 0);
-}
-
-
-LOCAL(void)
-emit_adobe_app14 (j_compress_ptr cinfo)
-/* Emit an Adobe APP14 marker */
-{
-  /*
-   * Length of APP14 block	(2 bytes)
-   * Block ID			(5 bytes - ASCII "Adobe")
-   * Version Number		(2 bytes - currently 100)
-   * Flags0			(2 bytes - currently 0)
-   * Flags1			(2 bytes - currently 0)
-   * Color transform		(1 byte)
-   *
-   * Although Adobe TN 5116 mentions Version = 101, all the Adobe files
-   * now in circulation seem to use Version = 100, so that's what we write.
-   *
-   * We write the color transform byte as 1 if the JPEG color space is
-   * YCbCr, 2 if it's YCCK, 0 otherwise.  Adobe's definition has to do with
-   * whether the encoder performed a transformation, which is pretty useless.
-   */
-  
-  emit_marker(cinfo, M_APP14);
-  
-  emit_2bytes(cinfo, 2 + 5 + 2 + 2 + 2 + 1); /* length */
-
-  emit_byte(cinfo, 0x41);	/* Identifier: ASCII "Adobe" */
-  emit_byte(cinfo, 0x64);
-  emit_byte(cinfo, 0x6F);
-  emit_byte(cinfo, 0x62);
-  emit_byte(cinfo, 0x65);
-  emit_2bytes(cinfo, 100);	/* Version */
-  emit_2bytes(cinfo, 0);	/* Flags0 */
-  emit_2bytes(cinfo, 0);	/* Flags1 */
-  switch (cinfo->jpeg_color_space) {
-  case JCS_YCbCr:
-    emit_byte(cinfo, 1);	/* Color transform = 1 */
-    break;
-  case JCS_YCCK:
-    emit_byte(cinfo, 2);	/* Color transform = 2 */
-    break;
-  default:
-    emit_byte(cinfo, 0);	/* Color transform = 0 */
-  }
-}
-
-
-/*
- * These routines allow writing an arbitrary marker with parameters.
- * The only intended use is to emit COM or APPn markers after calling
- * write_file_header and before calling write_frame_header.
- * Other uses are not guaranteed to produce desirable results.
- * Counting the parameter bytes properly is the caller's responsibility.
- */
-
-METHODDEF(void)
-write_marker_header (j_compress_ptr cinfo, int marker, unsigned int datalen)
-/* Emit an arbitrary marker header */
-{
-  if (datalen > (unsigned int) 65533)		/* safety check */
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  emit_marker(cinfo, (JPEG_MARKER) marker);
-
-  emit_2bytes(cinfo, (int) (datalen + 2));	/* total length */
-}
-
-METHODDEF(void)
-write_marker_byte (j_compress_ptr cinfo, int val)
-/* Emit one byte of marker parameters following write_marker_header */
-{
-  emit_byte(cinfo, val);
-}
-
-
-/*
- * Write datastream header.
- * This consists of an SOI and optional APPn markers.
- * We recommend use of the JFIF marker, but not the Adobe marker,
- * when using YCbCr or grayscale data.  The JFIF marker is also used
- * for other standard JPEG colorspaces.  The Adobe marker is helpful
- * to distinguish RGB, CMYK, and YCCK colorspaces.
- * Note that an application can write additional header markers after
- * jpeg_start_compress returns.
- */
-
-METHODDEF(void)
-write_file_header (j_compress_ptr cinfo)
-{
-  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
-
-  emit_marker(cinfo, M_SOI);	/* first the SOI */
-
-  /* SOI is defined to reset restart interval to 0 */
-  marker->last_restart_interval = 0;
-
-  if (cinfo->write_JFIF_header)	/* next an optional JFIF APP0 */
-    emit_jfif_app0(cinfo);
-  if (cinfo->write_Adobe_marker) /* next an optional Adobe APP14 */
-    emit_adobe_app14(cinfo);
-}
-
-
-/*
- * Write frame header.
- * This consists of DQT and SOFn markers,
- * a conditional LSE marker and a conditional pseudo SOS marker.
- * Note that we do not emit the SOF until we have emitted the DQT(s).
- * This avoids compatibility problems with incorrect implementations that
- * try to error-check the quant table numbers as soon as they see the SOF.
- */
-
-METHODDEF(void)
-write_frame_header (j_compress_ptr cinfo)
-{
-  int ci, prec;
-  boolean is_baseline;
-  jpeg_component_info *compptr;
-  
-  /* Emit DQT for each quantization table.
-   * Note that emit_dqt() suppresses any duplicate tables.
-   */
-  prec = 0;
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    prec += emit_dqt(cinfo, compptr->quant_tbl_no);
-  }
-  /* now prec is nonzero iff there are any 16-bit quant tables. */
-
-  /* Check for a non-baseline specification.
-   * Note we assume that Huffman table numbers won't be changed later.
-   */
-  if (cinfo->arith_code || cinfo->progressive_mode ||
-      cinfo->data_precision != 8 || cinfo->block_size != DCTSIZE) {
-    is_baseline = FALSE;
-  } else {
-    is_baseline = TRUE;
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      if (compptr->dc_tbl_no > 1 || compptr->ac_tbl_no > 1)
-	is_baseline = FALSE;
-    }
-    if (prec && is_baseline) {
-      is_baseline = FALSE;
-      /* If it's baseline except for quantizer size, warn the user */
-      TRACEMS(cinfo, 0, JTRC_16BIT_TABLES);
-    }
-  }
-
-  /* Emit the proper SOF marker */
-  if (cinfo->arith_code) {
-    if (cinfo->progressive_mode)
-      emit_sof(cinfo, M_SOF10); /* SOF code for progressive arithmetic */
-    else
-      emit_sof(cinfo, M_SOF9);  /* SOF code for sequential arithmetic */
-  } else {
-    if (cinfo->progressive_mode)
-      emit_sof(cinfo, M_SOF2);	/* SOF code for progressive Huffman */
-    else if (is_baseline)
-      emit_sof(cinfo, M_SOF0);	/* SOF code for baseline implementation */
-    else
-      emit_sof(cinfo, M_SOF1);	/* SOF code for non-baseline Huffman file */
-  }
-
-  /* Check to emit LSE inverse color transform specification marker */
-  if (cinfo->color_transform)
-    emit_lse_ict(cinfo);
-
-  /* Check to emit pseudo SOS marker */
-  if (cinfo->progressive_mode && cinfo->block_size != DCTSIZE)
-    emit_pseudo_sos(cinfo);
-}
-
-
-/*
- * Write scan header.
- * This consists of DHT or DAC markers, optional DRI, and SOS.
- * Compressed data will be written following the SOS.
- */
-
-METHODDEF(void)
-write_scan_header (j_compress_ptr cinfo)
-{
-  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
-  int i;
-  jpeg_component_info *compptr;
-
-  if (cinfo->arith_code) {
-    /* Emit arith conditioning info.  We may have some duplication
-     * if the file has multiple scans, but it's so small it's hardly
-     * worth worrying about.
-     */
-    emit_dac(cinfo);
-  } else {
-    /* Emit Huffman tables.
-     * Note that emit_dht() suppresses any duplicate tables.
-     */
-    for (i = 0; i < cinfo->comps_in_scan; i++) {
-      compptr = cinfo->cur_comp_info[i];
-      /* DC needs no table for refinement scan */
-      if (cinfo->Ss == 0 && cinfo->Ah == 0)
-	emit_dht(cinfo, compptr->dc_tbl_no, FALSE);
-      /* AC needs no table when not present */
-      if (cinfo->Se)
-	emit_dht(cinfo, compptr->ac_tbl_no, TRUE);
-    }
-  }
-
-  /* Emit DRI if required --- note that DRI value could change for each scan.
-   * We avoid wasting space with unnecessary DRIs, however.
-   */
-  if (cinfo->restart_interval != marker->last_restart_interval) {
-    emit_dri(cinfo);
-    marker->last_restart_interval = cinfo->restart_interval;
-  }
-
-  emit_sos(cinfo);
-}
-
-
-/*
- * Write datastream trailer.
- */
-
-METHODDEF(void)
-write_file_trailer (j_compress_ptr cinfo)
-{
-  emit_marker(cinfo, M_EOI);
-}
-
-
-/*
- * Write an abbreviated table-specification datastream.
- * This consists of SOI, DQT and DHT tables, and EOI.
- * Any table that is defined and not marked sent_table = TRUE will be
- * emitted.  Note that all tables will be marked sent_table = TRUE at exit.
- */
-
-METHODDEF(void)
-write_tables_only (j_compress_ptr cinfo)
-{
-  int i;
-
-  emit_marker(cinfo, M_SOI);
-
-  for (i = 0; i < NUM_QUANT_TBLS; i++) {
-    if (cinfo->quant_tbl_ptrs[i] != NULL)
-      (void) emit_dqt(cinfo, i);
-  }
-
-  if (! cinfo->arith_code) {
-    for (i = 0; i < NUM_HUFF_TBLS; i++) {
-      if (cinfo->dc_huff_tbl_ptrs[i] != NULL)
-	emit_dht(cinfo, i, FALSE);
-      if (cinfo->ac_huff_tbl_ptrs[i] != NULL)
-	emit_dht(cinfo, i, TRUE);
-    }
-  }
-
-  emit_marker(cinfo, M_EOI);
-}
-
-
-/*
- * Initialize the marker writer module.
- */
-
-GLOBAL(void)
-jinit_marker_writer (j_compress_ptr cinfo)
-{
-  my_marker_ptr marker;
-
-  /* Create the subobject */
-  marker = (my_marker_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_marker_writer));
-  cinfo->marker = &marker->pub;
-  /* Initialize method pointers */
-  marker->pub.write_file_header = write_file_header;
-  marker->pub.write_frame_header = write_frame_header;
-  marker->pub.write_scan_header = write_scan_header;
-  marker->pub.write_file_trailer = write_file_trailer;
-  marker->pub.write_tables_only = write_tables_only;
-  marker->pub.write_marker_header = write_marker_header;
-  marker->pub.write_marker_byte = write_marker_byte;
-  /* Initialize private state */
-  marker->last_restart_interval = 0;
-}
diff --git a/dep/libjpeg/src/jcmaster.c b/dep/libjpeg/src/jcmaster.c
deleted file mode 100644
index a70af0c02..000000000
--- a/dep/libjpeg/src/jcmaster.c
+++ /dev/null
@@ -1,675 +0,0 @@
-/*
- * jcmaster.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2003-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains master control logic for the JPEG compressor.
- * These routines are concerned with parameter validation, initial setup,
- * and inter-pass control (determining the number of passes and the work 
- * to be done in each pass).
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Private state */
-
-typedef enum {
-	main_pass,		/* input data, also do first output step */
-	huff_opt_pass,		/* Huffman code optimization pass */
-	output_pass		/* data output pass */
-} c_pass_type;
-
-typedef struct {
-  struct jpeg_comp_master pub;	/* public fields */
-
-  c_pass_type pass_type;	/* the type of the current pass */
-
-  int pass_number;		/* # of passes completed */
-  int total_passes;		/* total # of passes needed */
-
-  int scan_number;		/* current index in scan_info[] */
-} my_comp_master;
-
-typedef my_comp_master * my_master_ptr;
-
-
-/*
- * Support routines that do various essential calculations.
- */
-
-LOCAL(void)
-initial_setup (j_compress_ptr cinfo)
-/* Do computations that are needed before master selection phase */
-{
-  int ci, ssize;
-  jpeg_component_info *compptr;
-
-  /* Sanity check on block_size */
-  if (cinfo->block_size < 1 || cinfo->block_size > 16)
-    ERREXIT2(cinfo, JERR_BAD_DCTSIZE, cinfo->block_size, cinfo->block_size);
-
-  /* Derive natural_order from block_size */
-  switch (cinfo->block_size) {
-  case 2: cinfo->natural_order = jpeg_natural_order2; break;
-  case 3: cinfo->natural_order = jpeg_natural_order3; break;
-  case 4: cinfo->natural_order = jpeg_natural_order4; break;
-  case 5: cinfo->natural_order = jpeg_natural_order5; break;
-  case 6: cinfo->natural_order = jpeg_natural_order6; break;
-  case 7: cinfo->natural_order = jpeg_natural_order7; break;
-  default: cinfo->natural_order = jpeg_natural_order;
-  }
-
-  /* Derive lim_Se from block_size */
-  cinfo->lim_Se = cinfo->block_size < DCTSIZE ?
-    cinfo->block_size * cinfo->block_size - 1 : DCTSIZE2-1;
-
-  /* Sanity check on image dimensions */
-  if (cinfo->jpeg_height <= 0 || cinfo->jpeg_width <= 0 ||
-      cinfo->num_components <= 0)
-    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
-
-  /* Make sure image isn't bigger than I can handle */
-  if ((long) cinfo->jpeg_height > (long) JPEG_MAX_DIMENSION ||
-      (long) cinfo->jpeg_width > (long) JPEG_MAX_DIMENSION)
-    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
-
-  /* Only 8 to 12 bits data precision are supported for DCT based JPEG */
-  if (cinfo->data_precision < 8 || cinfo->data_precision > 12)
-    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
-
-  /* Check that number of components won't exceed internal array sizes */
-  if (cinfo->num_components > MAX_COMPONENTS)
-    ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
-	     MAX_COMPONENTS);
-
-  /* Compute maximum sampling factors; check factor validity */
-  cinfo->max_h_samp_factor = 1;
-  cinfo->max_v_samp_factor = 1;
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR ||
-	compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
-      ERREXIT(cinfo, JERR_BAD_SAMPLING);
-    cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
-				   compptr->h_samp_factor);
-    cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
-				   compptr->v_samp_factor);
-  }
-
-  /* Compute dimensions of components */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Fill in the correct component_index value; don't rely on application */
-    compptr->component_index = ci;
-    /* In selecting the actual DCT scaling for each component, we try to
-     * scale down the chroma components via DCT scaling rather than downsampling.
-     * This saves time if the downsampler gets to use 1:1 scaling.
-     * Note this code adapts subsampling ratios which are powers of 2.
-     */
-    ssize = 1;
-#ifdef DCT_SCALING_SUPPORTED
-    if (! cinfo->raw_data_in)
-      while (cinfo->min_DCT_h_scaled_size * ssize <=
-	     (cinfo->do_fancy_downsampling ? DCTSIZE : DCTSIZE / 2) &&
-	     (cinfo->max_h_samp_factor % (compptr->h_samp_factor * ssize * 2)) ==
-	     0) {
-	ssize = ssize * 2;
-      }
-#endif
-    compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size * ssize;
-    ssize = 1;
-#ifdef DCT_SCALING_SUPPORTED
-    if (! cinfo->raw_data_in)
-      while (cinfo->min_DCT_v_scaled_size * ssize <=
-	     (cinfo->do_fancy_downsampling ? DCTSIZE : DCTSIZE / 2) &&
-	     (cinfo->max_v_samp_factor % (compptr->v_samp_factor * ssize * 2)) ==
-	     0) {
-	ssize = ssize * 2;
-      }
-#endif
-    compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size * ssize;
-
-    /* We don't support DCT ratios larger than 2. */
-    if (compptr->DCT_h_scaled_size > compptr->DCT_v_scaled_size * 2)
-	compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size * 2;
-    else if (compptr->DCT_v_scaled_size > compptr->DCT_h_scaled_size * 2)
-	compptr->DCT_v_scaled_size = compptr->DCT_h_scaled_size * 2;
-
-    /* Size in DCT blocks */
-    compptr->width_in_blocks = (JDIMENSION)
-      jdiv_round_up((long) cinfo->jpeg_width * (long) compptr->h_samp_factor,
-		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
-    compptr->height_in_blocks = (JDIMENSION)
-      jdiv_round_up((long) cinfo->jpeg_height * (long) compptr->v_samp_factor,
-		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
-    /* Size in samples */
-    compptr->downsampled_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->jpeg_width *
-		    (long) (compptr->h_samp_factor * compptr->DCT_h_scaled_size),
-		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
-    compptr->downsampled_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->jpeg_height *
-		    (long) (compptr->v_samp_factor * compptr->DCT_v_scaled_size),
-		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
-    /* Don't need quantization scale after DCT,
-     * until color conversion says otherwise.
-     */
-    compptr->component_needed = FALSE;
-  }
-
-  /* Compute number of fully interleaved MCU rows (number of times that
-   * main controller will call coefficient controller).
-   */
-  cinfo->total_iMCU_rows = (JDIMENSION)
-    jdiv_round_up((long) cinfo->jpeg_height,
-		  (long) (cinfo->max_v_samp_factor * cinfo->block_size));
-}
-
-
-#ifdef C_MULTISCAN_FILES_SUPPORTED
-
-LOCAL(void)
-validate_script (j_compress_ptr cinfo)
-/* Verify that the scan script in cinfo->scan_info[] is valid; also
- * determine whether it uses progressive JPEG, and set cinfo->progressive_mode.
- */
-{
-  const jpeg_scan_info * scanptr;
-  int scanno, ncomps, ci, coefi, thisi;
-  int Ss, Se, Ah, Al;
-  boolean component_sent[MAX_COMPONENTS];
-#ifdef C_PROGRESSIVE_SUPPORTED
-  int * last_bitpos_ptr;
-  int last_bitpos[MAX_COMPONENTS][DCTSIZE2];
-  /* -1 until that coefficient has been seen; then last Al for it */
-#endif
-
-  if (cinfo->num_scans <= 0)
-    ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, 0);
-
-  /* For sequential JPEG, all scans must have Ss=0, Se=DCTSIZE2-1;
-   * for progressive JPEG, no scan can have this.
-   */
-  scanptr = cinfo->scan_info;
-  if (scanptr->Ss != 0 || scanptr->Se != DCTSIZE2-1) {
-#ifdef C_PROGRESSIVE_SUPPORTED
-    cinfo->progressive_mode = TRUE;
-    last_bitpos_ptr = & last_bitpos[0][0];
-    for (ci = 0; ci < cinfo->num_components; ci++) 
-      for (coefi = 0; coefi < DCTSIZE2; coefi++)
-	*last_bitpos_ptr++ = -1;
-#else
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif
-  } else {
-    cinfo->progressive_mode = FALSE;
-    for (ci = 0; ci < cinfo->num_components; ci++) 
-      component_sent[ci] = FALSE;
-  }
-
-  for (scanno = 1; scanno <= cinfo->num_scans; scanptr++, scanno++) {
-    /* Validate component indexes */
-    ncomps = scanptr->comps_in_scan;
-    if (ncomps <= 0 || ncomps > MAX_COMPS_IN_SCAN)
-      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, ncomps, MAX_COMPS_IN_SCAN);
-    for (ci = 0; ci < ncomps; ci++) {
-      thisi = scanptr->component_index[ci];
-      if (thisi < 0 || thisi >= cinfo->num_components)
-	ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
-      /* Components must appear in SOF order within each scan */
-      if (ci > 0 && thisi <= scanptr->component_index[ci-1])
-	ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
-    }
-    /* Validate progression parameters */
-    Ss = scanptr->Ss;
-    Se = scanptr->Se;
-    Ah = scanptr->Ah;
-    Al = scanptr->Al;
-    if (cinfo->progressive_mode) {
-#ifdef C_PROGRESSIVE_SUPPORTED
-      /* The JPEG spec simply gives the ranges 0..13 for Ah and Al, but that
-       * seems wrong: the upper bound ought to depend on data precision.
-       * Perhaps they really meant 0..N+1 for N-bit precision.
-       * Here we allow 0..10 for 8-bit data; Al larger than 10 results in
-       * out-of-range reconstructed DC values during the first DC scan,
-       * which might cause problems for some decoders.
-       */
-      if (Ss < 0 || Ss >= DCTSIZE2 || Se < Ss || Se >= DCTSIZE2 ||
-	  Ah < 0 || Ah > (cinfo->data_precision > 8 ? 13 : 10) ||
-	  Al < 0 || Al > (cinfo->data_precision > 8 ? 13 : 10))
-	ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-      if (Ss == 0) {
-	if (Se != 0)		/* DC and AC together not OK */
-	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-      } else {
-	if (ncomps != 1)	/* AC scans must be for only one component */
-	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-      }
-      for (ci = 0; ci < ncomps; ci++) {
-	last_bitpos_ptr = & last_bitpos[scanptr->component_index[ci]][0];
-	if (Ss != 0 && last_bitpos_ptr[0] < 0) /* AC without prior DC scan */
-	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-	for (coefi = Ss; coefi <= Se; coefi++) {
-	  if (last_bitpos_ptr[coefi] < 0) {
-	    /* first scan of this coefficient */
-	    if (Ah != 0)
-	      ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-	  } else {
-	    /* not first scan */
-	    if (Ah != last_bitpos_ptr[coefi] || Al != Ah-1)
-	      ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-	  }
-	  last_bitpos_ptr[coefi] = Al;
-	}
-      }
-#endif
-    } else {
-      /* For sequential JPEG, all progression parameters must be these: */
-      if (Ss != 0 || Se != DCTSIZE2-1 || Ah != 0 || Al != 0)
-	ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-      /* Make sure components are not sent twice */
-      for (ci = 0; ci < ncomps; ci++) {
-	thisi = scanptr->component_index[ci];
-	if (component_sent[thisi])
-	  ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
-	component_sent[thisi] = TRUE;
-      }
-    }
-  }
-
-  /* Now verify that everything got sent. */
-  if (cinfo->progressive_mode) {
-#ifdef C_PROGRESSIVE_SUPPORTED
-    /* For progressive mode, we only check that at least some DC data
-     * got sent for each component; the spec does not require that all bits
-     * of all coefficients be transmitted.  Would it be wiser to enforce
-     * transmission of all coefficient bits??
-     */
-    for (ci = 0; ci < cinfo->num_components; ci++) {
-      if (last_bitpos[ci][0] < 0)
-	ERREXIT(cinfo, JERR_MISSING_DATA);
-    }
-#endif
-  } else {
-    for (ci = 0; ci < cinfo->num_components; ci++) {
-      if (! component_sent[ci])
-	ERREXIT(cinfo, JERR_MISSING_DATA);
-    }
-  }
-}
-
-
-LOCAL(void)
-reduce_script (j_compress_ptr cinfo)
-/* Adapt scan script for use with reduced block size;
- * assume that script has been validated before.
- */
-{
-  jpeg_scan_info * scanptr;
-  int idxout, idxin;
-
-  /* Circumvent const declaration for this function */
-  scanptr = (jpeg_scan_info *) cinfo->scan_info;
-  idxout = 0;
-
-  for (idxin = 0; idxin < cinfo->num_scans; idxin++) {
-    /* After skipping, idxout becomes smaller than idxin */
-    if (idxin != idxout)
-      /* Copy rest of data;
-       * note we stay in given chunk of allocated memory.
-       */
-      scanptr[idxout] = scanptr[idxin];
-    if (scanptr[idxout].Ss > cinfo->lim_Se)
-      /* Entire scan out of range - skip this entry */
-      continue;
-    if (scanptr[idxout].Se > cinfo->lim_Se)
-      /* Limit scan to end of block */
-      scanptr[idxout].Se = cinfo->lim_Se;
-    idxout++;
-  }
-
-  cinfo->num_scans = idxout;
-}
-
-#endif /* C_MULTISCAN_FILES_SUPPORTED */
-
-
-LOCAL(void)
-select_scan_parameters (j_compress_ptr cinfo)
-/* Set up the scan parameters for the current scan */
-{
-  int ci;
-
-#ifdef C_MULTISCAN_FILES_SUPPORTED
-  if (cinfo->scan_info != NULL) {
-    /* Prepare for current scan --- the script is already validated */
-    my_master_ptr master = (my_master_ptr) cinfo->master;
-    const jpeg_scan_info * scanptr = cinfo->scan_info + master->scan_number;
-
-    cinfo->comps_in_scan = scanptr->comps_in_scan;
-    for (ci = 0; ci < scanptr->comps_in_scan; ci++) {
-      cinfo->cur_comp_info[ci] =
-	&cinfo->comp_info[scanptr->component_index[ci]];
-    }
-    if (cinfo->progressive_mode) {
-      cinfo->Ss = scanptr->Ss;
-      cinfo->Se = scanptr->Se;
-      cinfo->Ah = scanptr->Ah;
-      cinfo->Al = scanptr->Al;
-      return;
-    }
-  }
-  else
-#endif
-  {
-    /* Prepare for single sequential-JPEG scan containing all components */
-    if (cinfo->num_components > MAX_COMPS_IN_SCAN)
-      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
-	       MAX_COMPS_IN_SCAN);
-    cinfo->comps_in_scan = cinfo->num_components;
-    for (ci = 0; ci < cinfo->num_components; ci++) {
-      cinfo->cur_comp_info[ci] = &cinfo->comp_info[ci];
-    }
-  }
-  cinfo->Ss = 0;
-  cinfo->Se = cinfo->block_size * cinfo->block_size - 1;
-  cinfo->Ah = 0;
-  cinfo->Al = 0;
-}
-
-
-LOCAL(void)
-per_scan_setup (j_compress_ptr cinfo)
-/* Do computations that are needed before processing a JPEG scan */
-/* cinfo->comps_in_scan and cinfo->cur_comp_info[] are already set */
-{
-  int ci, mcublks, tmp;
-  jpeg_component_info *compptr;
-
-  if (cinfo->comps_in_scan == 1) {
-
-    /* Noninterleaved (single-component) scan */
-    compptr = cinfo->cur_comp_info[0];
-
-    /* Overall image size in MCUs */
-    cinfo->MCUs_per_row = compptr->width_in_blocks;
-    cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
-
-    /* For noninterleaved scan, always one block per MCU */
-    compptr->MCU_width = 1;
-    compptr->MCU_height = 1;
-    compptr->MCU_blocks = 1;
-    compptr->MCU_sample_width = compptr->DCT_h_scaled_size;
-    compptr->last_col_width = 1;
-    /* For noninterleaved scans, it is convenient to define last_row_height
-     * as the number of block rows present in the last iMCU row.
-     */
-    tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
-    if (tmp == 0) tmp = compptr->v_samp_factor;
-    compptr->last_row_height = tmp;
-
-    /* Prepare array describing MCU composition */
-    cinfo->blocks_in_MCU = 1;
-    cinfo->MCU_membership[0] = 0;
-
-  } else {
-
-    /* Interleaved (multi-component) scan */
-    if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
-      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
-	       MAX_COMPS_IN_SCAN);
-
-    /* Overall image size in MCUs */
-    cinfo->MCUs_per_row = (JDIMENSION)
-      jdiv_round_up((long) cinfo->jpeg_width,
-		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
-    cinfo->MCU_rows_in_scan = cinfo->total_iMCU_rows;
-
-    cinfo->blocks_in_MCU = 0;
-
-    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-      compptr = cinfo->cur_comp_info[ci];
-      /* Sampling factors give # of blocks of component in each MCU */
-      compptr->MCU_width = compptr->h_samp_factor;
-      compptr->MCU_height = compptr->v_samp_factor;
-      compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
-      compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_h_scaled_size;
-      /* Figure number of non-dummy blocks in last MCU column & row */
-      tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
-      if (tmp == 0) tmp = compptr->MCU_width;
-      compptr->last_col_width = tmp;
-      tmp = (int) (compptr->height_in_blocks % compptr->MCU_height);
-      if (tmp == 0) tmp = compptr->MCU_height;
-      compptr->last_row_height = tmp;
-      /* Prepare array describing MCU composition */
-      mcublks = compptr->MCU_blocks;
-      if (cinfo->blocks_in_MCU + mcublks > C_MAX_BLOCKS_IN_MCU)
-	ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
-      while (mcublks-- > 0) {
-	cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
-      }
-    }
-
-  }
-
-  /* Convert restart specified in rows to actual MCU count. */
-  /* Note that count must fit in 16 bits, so we provide limiting. */
-  if (cinfo->restart_in_rows > 0) {
-    long nominal = (long) cinfo->restart_in_rows * (long) cinfo->MCUs_per_row;
-    cinfo->restart_interval = (unsigned int) MIN(nominal, 65535L);
-  }
-}
-
-
-/*
- * Per-pass setup.
- * This is called at the beginning of each pass.  We determine which modules
- * will be active during this pass and give them appropriate start_pass calls.
- * We also set is_last_pass to indicate whether any more passes will be
- * required.
- */
-
-METHODDEF(void)
-prepare_for_pass (j_compress_ptr cinfo)
-{
-  my_master_ptr master = (my_master_ptr) cinfo->master;
-
-  switch (master->pass_type) {
-  case main_pass:
-    /* Initial pass: will collect input data, and do either Huffman
-     * optimization or data output for the first scan.
-     */
-    select_scan_parameters(cinfo);
-    per_scan_setup(cinfo);
-    if (! cinfo->raw_data_in) {
-      (*cinfo->cconvert->start_pass) (cinfo);
-      (*cinfo->downsample->start_pass) (cinfo);
-      (*cinfo->prep->start_pass) (cinfo, JBUF_PASS_THRU);
-    }
-    (*cinfo->fdct->start_pass) (cinfo);
-    (*cinfo->entropy->start_pass) (cinfo, cinfo->optimize_coding);
-    (*cinfo->coef->start_pass) (cinfo,
-				(master->total_passes > 1 ?
-				 JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
-    (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
-    if (cinfo->optimize_coding) {
-      /* No immediate data output; postpone writing frame/scan headers */
-      master->pub.call_pass_startup = FALSE;
-    } else {
-      /* Will write frame/scan headers at first jpeg_write_scanlines call */
-      master->pub.call_pass_startup = TRUE;
-    }
-    break;
-#ifdef ENTROPY_OPT_SUPPORTED
-  case huff_opt_pass:
-    /* Do Huffman optimization for a scan after the first one. */
-    select_scan_parameters(cinfo);
-    per_scan_setup(cinfo);
-    if (cinfo->Ss != 0 || cinfo->Ah == 0) {
-      (*cinfo->entropy->start_pass) (cinfo, TRUE);
-      (*cinfo->coef->start_pass) (cinfo, JBUF_CRANK_DEST);
-      master->pub.call_pass_startup = FALSE;
-      break;
-    }
-    /* Special case: Huffman DC refinement scans need no Huffman table
-     * and therefore we can skip the optimization pass for them.
-     */
-    master->pass_type = output_pass;
-    master->pass_number++;
-    /*FALLTHROUGH*/
-#endif
-  case output_pass:
-    /* Do a data-output pass. */
-    /* We need not repeat per-scan setup if prior optimization pass did it. */
-    if (! cinfo->optimize_coding) {
-      select_scan_parameters(cinfo);
-      per_scan_setup(cinfo);
-    }
-    (*cinfo->entropy->start_pass) (cinfo, FALSE);
-    (*cinfo->coef->start_pass) (cinfo, JBUF_CRANK_DEST);
-    /* We emit frame/scan headers now */
-    if (master->scan_number == 0)
-      (*cinfo->marker->write_frame_header) (cinfo);
-    (*cinfo->marker->write_scan_header) (cinfo);
-    master->pub.call_pass_startup = FALSE;
-    break;
-  default:
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-  }
-
-  master->pub.is_last_pass = (master->pass_number == master->total_passes-1);
-
-  /* Set up progress monitor's pass info if present */
-  if (cinfo->progress != NULL) {
-    cinfo->progress->completed_passes = master->pass_number;
-    cinfo->progress->total_passes = master->total_passes;
-  }
-}
-
-
-/*
- * Special start-of-pass hook.
- * This is called by jpeg_write_scanlines if call_pass_startup is TRUE.
- * In single-pass processing, we need this hook because we don't want to
- * write frame/scan headers during jpeg_start_compress; we want to let the
- * application write COM markers etc. between jpeg_start_compress and the
- * jpeg_write_scanlines loop.
- * In multi-pass processing, this routine is not used.
- */
-
-METHODDEF(void)
-pass_startup (j_compress_ptr cinfo)
-{
-  cinfo->master->call_pass_startup = FALSE; /* reset flag so call only once */
-
-  (*cinfo->marker->write_frame_header) (cinfo);
-  (*cinfo->marker->write_scan_header) (cinfo);
-}
-
-
-/*
- * Finish up at end of pass.
- */
-
-METHODDEF(void)
-finish_pass_master (j_compress_ptr cinfo)
-{
-  my_master_ptr master = (my_master_ptr) cinfo->master;
-
-  /* The entropy coder always needs an end-of-pass call,
-   * either to analyze statistics or to flush its output buffer.
-   */
-  (*cinfo->entropy->finish_pass) (cinfo);
-
-  /* Update state for next pass */
-  switch (master->pass_type) {
-  case main_pass:
-    /* next pass is either output of scan 0 (after optimization)
-     * or output of scan 1 (if no optimization).
-     */
-    master->pass_type = output_pass;
-    if (! cinfo->optimize_coding)
-      master->scan_number++;
-    break;
-  case huff_opt_pass:
-    /* next pass is always output of current scan */
-    master->pass_type = output_pass;
-    break;
-  case output_pass:
-    /* next pass is either optimization or output of next scan */
-    if (cinfo->optimize_coding)
-      master->pass_type = huff_opt_pass;
-    master->scan_number++;
-    break;
-  }
-
-  master->pass_number++;
-}
-
-
-/*
- * Initialize master compression control.
- */
-
-GLOBAL(void)
-jinit_c_master_control (j_compress_ptr cinfo, boolean transcode_only)
-{
-  my_master_ptr master;
-
-  master = (my_master_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_comp_master));
-  cinfo->master = &master->pub;
-  master->pub.prepare_for_pass = prepare_for_pass;
-  master->pub.pass_startup = pass_startup;
-  master->pub.finish_pass = finish_pass_master;
-  master->pub.is_last_pass = FALSE;
-
-  /* Validate parameters, determine derived values */
-  initial_setup(cinfo);
-
-  if (cinfo->scan_info != NULL) {
-#ifdef C_MULTISCAN_FILES_SUPPORTED
-    validate_script(cinfo);
-    if (cinfo->block_size < DCTSIZE)
-      reduce_script(cinfo);
-#else
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif
-  } else {
-    cinfo->progressive_mode = FALSE;
-    cinfo->num_scans = 1;
-  }
-
-  if (cinfo->optimize_coding)
-    cinfo->arith_code = FALSE; /* disable arithmetic coding */
-  else if (! cinfo->arith_code &&
-	   (cinfo->progressive_mode ||
-	    (cinfo->block_size > 1 && cinfo->block_size < DCTSIZE)))
-    /* TEMPORARY HACK ??? */
-    /* assume default tables no good for progressive or reduced AC mode */
-    cinfo->optimize_coding = TRUE; /* force Huffman optimization */
-
-  /* Initialize my private state */
-  if (transcode_only) {
-    /* no main pass in transcoding */
-    if (cinfo->optimize_coding)
-      master->pass_type = huff_opt_pass;
-    else
-      master->pass_type = output_pass;
-  } else {
-    /* for normal compression, first pass is always this type: */
-    master->pass_type = main_pass;
-  }
-  master->scan_number = 0;
-  master->pass_number = 0;
-  if (cinfo->optimize_coding)
-    master->total_passes = cinfo->num_scans * 2;
-  else
-    master->total_passes = cinfo->num_scans;
-}
diff --git a/dep/libjpeg/src/jcomapi.c b/dep/libjpeg/src/jcomapi.c
deleted file mode 100644
index 678c5d12d..000000000
--- a/dep/libjpeg/src/jcomapi.c
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * jcomapi.c
- *
- * Copyright (C) 1994-1997, Thomas G. Lane.
- * Modified 2019 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains application interface routines that are used for both
- * compression and decompression.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * Abort processing of a JPEG compression or decompression operation,
- * but don't destroy the object itself.
- *
- * For this, we merely clean up all the nonpermanent memory pools.
- * Note that temp files (virtual arrays) are not allowed to belong to
- * the permanent pool, so we will be able to close all temp files here.
- * Closing a data source or destination, if necessary, is the application's
- * responsibility.
- */
-
-GLOBAL(void)
-jpeg_abort (j_common_ptr cinfo)
-{
-  int pool;
-
-  /* Do nothing if called on a not-initialized or destroyed JPEG object. */
-  if (cinfo->mem == NULL)
-    return;
-
-  /* Releasing pools in reverse order might help avoid fragmentation
-   * with some (brain-damaged) malloc libraries.
-   */
-  for (pool = JPOOL_NUMPOOLS-1; pool > JPOOL_PERMANENT; pool--) {
-    (*cinfo->mem->free_pool) (cinfo, pool);
-  }
-
-  /* Reset overall state for possible reuse of object */
-  if (cinfo->is_decompressor) {
-    cinfo->global_state = DSTATE_START;
-    /* Try to keep application from accessing now-deleted marker list.
-     * A bit kludgy to do it here, but this is the most central place.
-     */
-    ((j_decompress_ptr) cinfo)->marker_list = NULL;
-  } else {
-    cinfo->global_state = CSTATE_START;
-  }
-}
-
-
-/*
- * Destruction of a JPEG object.
- *
- * Everything gets deallocated except the master jpeg_compress_struct itself
- * and the error manager struct.  Both of these are supplied by the application
- * and must be freed, if necessary, by the application.  (Often they are on
- * the stack and so don't need to be freed anyway.)
- * Closing a data source or destination, if necessary, is the application's
- * responsibility.
- */
-
-GLOBAL(void)
-jpeg_destroy (j_common_ptr cinfo)
-{
-  /* We need only tell the memory manager to release everything. */
-  /* NB: mem pointer is NULL if memory mgr failed to initialize. */
-  if (cinfo->mem != NULL)
-    (*cinfo->mem->self_destruct) (cinfo);
-  cinfo->mem = NULL;		/* be safe if jpeg_destroy is called twice */
-  cinfo->global_state = 0;	/* mark it destroyed */
-}
-
-
-/*
- * Convenience routines for allocating quantization and Huffman tables.
- * (Would jutils.c be a more reasonable place to put these?)
- */
-
-GLOBAL(JQUANT_TBL *)
-jpeg_alloc_quant_table (j_common_ptr cinfo)
-{
-  JQUANT_TBL *tbl;
-
-  tbl = (JQUANT_TBL *)
-    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JQUANT_TBL));
-  tbl->sent_table = FALSE;	/* make sure this is false in any new table */
-  return tbl;
-}
-
-
-GLOBAL(JHUFF_TBL *)
-jpeg_alloc_huff_table (j_common_ptr cinfo)
-{
-  JHUFF_TBL *tbl;
-
-  tbl = (JHUFF_TBL *)
-    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JHUFF_TBL));
-  tbl->sent_table = FALSE;	/* make sure this is false in any new table */
-  return tbl;
-}
-
-
-/*
- * Set up the standard Huffman tables (cf. JPEG standard section K.3).
- * IMPORTANT: these are only valid for 8-bit data precision!
- * (Would jutils.c be a more reasonable place to put this?)
- */
-
-GLOBAL(JHUFF_TBL *)
-jpeg_std_huff_table (j_common_ptr cinfo, boolean isDC, int tblno)
-{
-  JHUFF_TBL **htblptr, *htbl;
-  const UINT8 *bits, *val;
-  int nsymbols, len;
-
-  static const UINT8 bits_dc_luminance[17] =
-    { /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
-  static const UINT8 val_dc_luminance[] =
-    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
-
-  static const UINT8 bits_dc_chrominance[17] =
-    { /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
-  static const UINT8 val_dc_chrominance[] =
-    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
-
-  static const UINT8 bits_ac_luminance[17] =
-    { /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d };
-  static const UINT8 val_ac_luminance[] =
-    { 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
-      0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
-      0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
-      0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
-      0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
-      0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
-      0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
-      0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
-      0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
-      0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
-      0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
-      0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
-      0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
-      0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
-      0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
-      0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
-      0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
-      0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
-      0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
-      0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-      0xf9, 0xfa };
-
-  static const UINT8 bits_ac_chrominance[17] =
-    { /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 };
-  static const UINT8 val_ac_chrominance[] =
-    { 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
-      0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
-      0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
-      0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
-      0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
-      0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
-      0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
-      0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
-      0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
-      0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
-      0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
-      0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
-      0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
-      0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
-      0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
-      0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
-      0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
-      0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
-      0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
-      0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-      0xf9, 0xfa };
-
-  if (cinfo->is_decompressor) {
-    if (isDC)
-      htblptr = ((j_decompress_ptr) cinfo)->dc_huff_tbl_ptrs;
-    else
-      htblptr = ((j_decompress_ptr) cinfo)->ac_huff_tbl_ptrs;
-  } else {
-    if (isDC)
-      htblptr = ((j_compress_ptr) cinfo)->dc_huff_tbl_ptrs;
-    else
-      htblptr = ((j_compress_ptr) cinfo)->ac_huff_tbl_ptrs;
-  }
-
-  switch (tblno) {
-  case 0:
-    if (isDC) {
-      bits = bits_dc_luminance;
-      val = val_dc_luminance;
-    } else {
-      bits = bits_ac_luminance;
-      val = val_ac_luminance;
-    }
-    break;
-  case 1:
-    if (isDC) {
-      bits = bits_dc_chrominance;
-      val = val_dc_chrominance;
-    } else {
-      bits = bits_ac_chrominance;
-      val = val_ac_chrominance;
-    }
-    break;
-  default:
-    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
-    return NULL; /* avoid compiler warnings for uninitialized variables */
-  }
-
-  if (htblptr[tblno] == NULL)
-    htblptr[tblno] = jpeg_alloc_huff_table(cinfo);
-
-  htbl = htblptr[tblno];
-
-  /* Copy the number-of-symbols-of-each-code-length counts */
-  MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits));
-
-  /* Validate the counts.  We do this here mainly so we can copy the right
-   * number of symbols from the val[] array, without risking marching off
-   * the end of memory.  jxhuff.c will do a more thorough test later.
-   */
-  nsymbols = 0;
-  for (len = 1; len <= 16; len++)
-    nsymbols += bits[len];
-  if (nsymbols > 256)
-    ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-
-  if (nsymbols > 0)
-    MEMCOPY(htbl->huffval, val, nsymbols * SIZEOF(UINT8));
-
-  /* Initialize sent_table FALSE so table will be written to JPEG file. */
-  htbl->sent_table = FALSE;
-
-  return htbl;
-}
diff --git a/dep/libjpeg/src/jcparam.c b/dep/libjpeg/src/jcparam.c
deleted file mode 100644
index 261ae86ca..000000000
--- a/dep/libjpeg/src/jcparam.c
+++ /dev/null
@@ -1,591 +0,0 @@
-/*
- * jcparam.c
- *
- * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2003-2022 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains optional default-setting code for the JPEG compressor.
- * Applications do not have to use this file, but those that don't use it
- * must know a lot more about the innards of the JPEG code.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * Quantization table setup routines
- */
-
-GLOBAL(void)
-jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl,
-		      const unsigned int *basic_table,
-		      int scale_factor, boolean force_baseline)
-/* Define a quantization table equal to the basic_table times
- * a scale factor (given as a percentage).
- * If force_baseline is TRUE, the computed quantization table entries
- * are limited to 1..255 for JPEG baseline compatibility.
- */
-{
-  JQUANT_TBL ** qtblptr;
-  int i;
-  long temp;
-
-  /* Safety check to ensure start_compress not called yet. */
-  if (cinfo->global_state != CSTATE_START)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  if (which_tbl < 0 || which_tbl >= NUM_QUANT_TBLS)
-    ERREXIT1(cinfo, JERR_DQT_INDEX, which_tbl);
-
-  qtblptr = & cinfo->quant_tbl_ptrs[which_tbl];
-
-  if (*qtblptr == NULL)
-    *qtblptr = jpeg_alloc_quant_table((j_common_ptr) cinfo);
-
-  for (i = 0; i < DCTSIZE2; i++) {
-    temp = ((long) basic_table[i] * scale_factor + 50L) / 100L;
-    /* limit the values to the valid range */
-    if (temp <= 0L) temp = 1L;
-    if (temp > 32767L) temp = 32767L; /* max quantizer needed for 12 bits */
-    if (force_baseline && temp > 255L)
-      temp = 255L;		/* limit to baseline range if requested */
-    (*qtblptr)->quantval[i] = (UINT16) temp;
-  }
-
-  /* Initialize sent_table FALSE so table will be written to JPEG file. */
-  (*qtblptr)->sent_table = FALSE;
-}
-
-
-/* These are the sample quantization tables given in JPEG spec section K.1.
- * NOTE: chrominance DC value is changed from 17 to 16 for lossless support.
- * The spec says that the values given produce "good" quality,
- * and when divided by 2, "very good" quality.
- */
-static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = {
-  16,  11,  10,  16,  24,  40,  51,  61,
-  12,  12,  14,  19,  26,  58,  60,  55,
-  14,  13,  16,  24,  40,  57,  69,  56,
-  14,  17,  22,  29,  51,  87,  80,  62,
-  18,  22,  37,  56,  68, 109, 103,  77,
-  24,  35,  55,  64,  81, 104, 113,  92,
-  49,  64,  78,  87, 103, 121, 120, 101,
-  72,  92,  95,  98, 112, 100, 103,  99
-};
-static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = {
-  16,  18,  24,  47,  99,  99,  99,  99,
-  18,  21,  26,  66,  99,  99,  99,  99,
-  24,  26,  56,  99,  99,  99,  99,  99,
-  47,  66,  99,  99,  99,  99,  99,  99,
-  99,  99,  99,  99,  99,  99,  99,  99,
-  99,  99,  99,  99,  99,  99,  99,  99,
-  99,  99,  99,  99,  99,  99,  99,  99,
-  99,  99,  99,  99,  99,  99,  99,  99
-};
-
-
-GLOBAL(void)
-jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline)
-/* Set or change the 'quality' (quantization) setting, using default tables
- * and straight percentage-scaling quality scales.
- * This entry point allows different scalings for luminance and chrominance.
- */
-{
-  /* Set up two quantization tables using the specified scaling */
-  jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
-		       cinfo->q_scale_factor[0], force_baseline);
-  jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
-		       cinfo->q_scale_factor[1], force_baseline);
-}
-
-
-GLOBAL(void)
-jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor,
-			 boolean force_baseline)
-/* Set or change the 'quality' (quantization) setting, using default tables
- * and a straight percentage-scaling quality scale.  In most cases it's better
- * to use jpeg_set_quality (below); this entry point is provided for
- * applications that insist on a linear percentage scaling.
- */
-{
-  /* Set up two quantization tables using the specified scaling */
-  jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
-		       scale_factor, force_baseline);
-  jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
-		       scale_factor, force_baseline);
-}
-
-
-GLOBAL(int)
-jpeg_quality_scaling (int quality)
-/* Convert a user-specified quality rating to a percentage scaling factor
- * for an underlying quantization table, using our recommended scaling curve.
- * The input 'quality' factor should be 0 (terrible) to 100 (very good).
- */
-{
-  /* Safety limit on quality factor.  Convert 0 to 1 to avoid zero divide. */
-  if (quality <= 0) quality = 1;
-  if (quality > 100) quality = 100;
-
-  /* The basic table is used as-is (scaling 100) for a quality of 50.
-   * Qualities 50..100 are converted to scaling percentage 200 - 2*Q;
-   * note that at Q=100 the scaling is 0, which will cause jpeg_add_quant_table
-   * to make all the table entries 1 (hence, minimum quantization loss).
-   * Qualities 1..50 are converted to scaling percentage 5000/Q.
-   */
-  if (quality < 50)
-    quality = 5000 / quality;
-  else
-    quality = 200 - quality*2;
-
-  return quality;
-}
-
-
-GLOBAL(void)
-jpeg_set_quality (j_compress_ptr cinfo, int quality, boolean force_baseline)
-/* Set or change the 'quality' (quantization) setting, using default tables.
- * This is the standard quality-adjusting entry point for typical user
- * interfaces; only those who want detailed control over quantization tables
- * would use the preceding routines directly.
- */
-{
-  /* Convert user 0-100 rating to percentage scaling */
-  quality = jpeg_quality_scaling(quality);
-
-  /* Set up standard quality tables */
-  jpeg_set_linear_quality(cinfo, quality, force_baseline);
-}
-
-
-/*
- * Reset standard Huffman tables
- */
-
-LOCAL(void)
-std_huff_tables (j_compress_ptr cinfo)
-{
-  if (cinfo->dc_huff_tbl_ptrs[0] != NULL)
-    (void) jpeg_std_huff_table((j_common_ptr) cinfo, TRUE, 0);
-
-  if (cinfo->ac_huff_tbl_ptrs[0] != NULL)
-    (void) jpeg_std_huff_table((j_common_ptr) cinfo, FALSE, 0);
-
-  if (cinfo->dc_huff_tbl_ptrs[1] != NULL)
-    (void) jpeg_std_huff_table((j_common_ptr) cinfo, TRUE, 1);
-
-  if (cinfo->ac_huff_tbl_ptrs[1] != NULL)
-    (void) jpeg_std_huff_table((j_common_ptr) cinfo, FALSE, 1);
-}
-
-
-/*
- * Default parameter setup for compression.
- *
- * Applications that don't choose to use this routine must do their
- * own setup of all these parameters.  Alternately, you can call this
- * to establish defaults and then alter parameters selectively.  This
- * is the recommended approach since, if we add any new parameters,
- * your code will still work (they'll be set to reasonable defaults).
- */
-
-GLOBAL(void)
-jpeg_set_defaults (j_compress_ptr cinfo)
-{
-  int i;
-
-  /* Safety check to ensure start_compress not called yet. */
-  if (cinfo->global_state != CSTATE_START)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  /* Allocate comp_info array large enough for maximum component count.
-   * Array is made permanent in case application wants to compress
-   * multiple images at same param settings.
-   */
-  if (cinfo->comp_info == NULL)
-    cinfo->comp_info = (jpeg_component_info *)
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				  MAX_COMPONENTS * SIZEOF(jpeg_component_info));
-
-  /* Initialize everything not dependent on the color space */
-
-  cinfo->scale_num = 1;		/* 1:1 scaling */
-  cinfo->scale_denom = 1;
-  cinfo->data_precision = BITS_IN_JSAMPLE;
-  /* Set up two quantization tables using default quality of 75 */
-  jpeg_set_quality(cinfo, 75, TRUE);
-  /* Reset standard Huffman tables */
-  std_huff_tables(cinfo);
-
-  /* Initialize default arithmetic coding conditioning */
-  for (i = 0; i < NUM_ARITH_TBLS; i++) {
-    cinfo->arith_dc_L[i] = 0;
-    cinfo->arith_dc_U[i] = 1;
-    cinfo->arith_ac_K[i] = 5;
-  }
-
-  /* Default is no multiple-scan output */
-  cinfo->scan_info = NULL;
-  cinfo->num_scans = 0;
-
-  /* Expect normal source image, not raw downsampled data */
-  cinfo->raw_data_in = FALSE;
-
-  /* The standard Huffman tables are only valid for 8-bit data precision.
-   * If the precision is higher, use arithmetic coding.
-   * (Alternatively, using Huffman coding would be possible with forcing
-   * optimization on so that usable tables will be computed, or by
-   * supplying default tables that are valid for the desired precision.)
-   * Otherwise, use Huffman coding by default.
-   */
-  cinfo->arith_code = cinfo->data_precision > 8 ? TRUE : FALSE;
-
-  /* By default, don't do extra passes to optimize entropy coding */
-  cinfo->optimize_coding = FALSE;
-
-  /* By default, use the simpler non-cosited sampling alignment */
-  cinfo->CCIR601_sampling = FALSE;
-
-  /* By default, apply fancy downsampling */
-  cinfo->do_fancy_downsampling = TRUE;
-
-  /* No input smoothing */
-  cinfo->smoothing_factor = 0;
-
-  /* DCT algorithm preference */
-  cinfo->dct_method = JDCT_DEFAULT;
-
-  /* No restart markers */
-  cinfo->restart_interval = 0;
-  cinfo->restart_in_rows = 0;
-
-  /* Fill in default JFIF marker parameters.  Note that whether the marker
-   * will actually be written is determined by jpeg_set_colorspace.
-   *
-   * By default, the library emits JFIF version code 1.01.
-   * An application that wants to emit JFIF 1.02 extension markers should set
-   * JFIF_minor_version to 2.  We could probably get away with just defaulting
-   * to 1.02, but there may still be some decoders in use that will complain
-   * about that; saying 1.01 should minimize compatibility problems.
-   *
-   * For wide gamut colorspaces (BG_RGB and BG_YCC), the major version will be
-   * overridden by jpeg_set_colorspace and set to 2.
-   */
-  cinfo->JFIF_major_version = 1; /* Default JFIF version = 1.01 */
-  cinfo->JFIF_minor_version = 1;
-  cinfo->density_unit = 0;	/* Pixel size is unknown by default */
-  cinfo->X_density = 1;		/* Pixel aspect ratio is square by default */
-  cinfo->Y_density = 1;
-
-  /* No color transform */
-  cinfo->color_transform = JCT_NONE;
-
-  /* Choose JPEG colorspace based on input space, set defaults accordingly */
-
-  jpeg_default_colorspace(cinfo);
-}
-
-
-/*
- * Select an appropriate JPEG colorspace for in_color_space.
- */
-
-GLOBAL(void)
-jpeg_default_colorspace (j_compress_ptr cinfo)
-{
-  switch (cinfo->in_color_space) {
-  case JCS_UNKNOWN:
-    jpeg_set_colorspace(cinfo, JCS_UNKNOWN);
-    break;
-  case JCS_GRAYSCALE:
-    jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
-    break;
-  case JCS_RGB:
-    jpeg_set_colorspace(cinfo, JCS_YCbCr);
-    break;
-  case JCS_YCbCr:
-    jpeg_set_colorspace(cinfo, JCS_YCbCr);
-    break;
-  case JCS_CMYK:
-    jpeg_set_colorspace(cinfo, JCS_CMYK); /* By default, no translation */
-    break;
-  case JCS_YCCK:
-    jpeg_set_colorspace(cinfo, JCS_YCCK);
-    break;
-  case JCS_BG_RGB:
-    /* No translation for now -- conversion to BG_YCC not yet supportet */
-    jpeg_set_colorspace(cinfo, JCS_BG_RGB);
-    break;
-  case JCS_BG_YCC:
-    jpeg_set_colorspace(cinfo, JCS_BG_YCC);
-    break;
-  default:
-    ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
-  }
-}
-
-
-/*
- * Set the JPEG colorspace, and choose colorspace-dependent default values.
- */
-
-GLOBAL(void)
-jpeg_set_colorspace (j_compress_ptr cinfo, J_COLOR_SPACE colorspace)
-{
-  jpeg_component_info * compptr;
-  int ci;
-
-#define SET_COMP(index,id,hsamp,vsamp,quant,dctbl,actbl)  \
-  (compptr = &cinfo->comp_info[index], \
-   compptr->component_id = (id), \
-   compptr->h_samp_factor = (hsamp), \
-   compptr->v_samp_factor = (vsamp), \
-   compptr->quant_tbl_no = (quant), \
-   compptr->dc_tbl_no = (dctbl), \
-   compptr->ac_tbl_no = (actbl) )
-
-  /* Safety check to ensure start_compress not called yet. */
-  if (cinfo->global_state != CSTATE_START)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  /* For all colorspaces, we use Q and Huff tables 0 for luminance components,
-   * tables 1 for chrominance components.
-   */
-
-  cinfo->jpeg_color_space = colorspace;
-
-  cinfo->write_JFIF_header = FALSE; /* No marker for non-JFIF colorspaces */
-  cinfo->write_Adobe_marker = FALSE; /* write no Adobe marker by default */
-
-  switch (colorspace) {
-  case JCS_UNKNOWN:
-    cinfo->num_components = cinfo->input_components;
-    if (cinfo->num_components < 1 || cinfo->num_components > MAX_COMPONENTS)
-      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
-	       MAX_COMPONENTS);
-    for (ci = 0; ci < cinfo->num_components; ci++) {
-      SET_COMP(ci, ci, 1,1, 0, 0,0);
-    }
-    break;
-  case JCS_GRAYSCALE:
-    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
-    cinfo->num_components = 1;
-    /* JFIF specifies component ID 1 */
-    SET_COMP(0, 0x01, 1,1, 0, 0,0);
-    break;
-  case JCS_RGB:
-    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag RGB */
-    cinfo->num_components = 3;
-    SET_COMP(0, 0x52 /* 'R' */, 1,1,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0);
-    SET_COMP(1, 0x47 /* 'G' */, 1,1, 0, 0,0);
-    SET_COMP(2, 0x42 /* 'B' */, 1,1,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0);
-    break;
-  case JCS_YCbCr:
-    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
-    cinfo->num_components = 3;
-    /* JFIF specifies component IDs 1,2,3 */
-    /* We default to 2x2 subsamples of chrominance */
-    SET_COMP(0, 0x01, 2,2, 0, 0,0);
-    SET_COMP(1, 0x02, 1,1, 1, 1,1);
-    SET_COMP(2, 0x03, 1,1, 1, 1,1);
-    break;
-  case JCS_CMYK:
-    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag CMYK */
-    cinfo->num_components = 4;
-    SET_COMP(0, 0x43 /* 'C' */, 1,1, 0, 0,0);
-    SET_COMP(1, 0x4D /* 'M' */, 1,1, 0, 0,0);
-    SET_COMP(2, 0x59 /* 'Y' */, 1,1, 0, 0,0);
-    SET_COMP(3, 0x4B /* 'K' */, 1,1, 0, 0,0);
-    break;
-  case JCS_YCCK:
-    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag YCCK */
-    cinfo->num_components = 4;
-    SET_COMP(0, 0x01, 2,2, 0, 0,0);
-    SET_COMP(1, 0x02, 1,1, 1, 1,1);
-    SET_COMP(2, 0x03, 1,1, 1, 1,1);
-    SET_COMP(3, 0x04, 2,2, 0, 0,0);
-    break;
-  case JCS_BG_RGB:
-    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
-    cinfo->JFIF_major_version = 2;   /* Set JFIF major version = 2 */
-    cinfo->num_components = 3;
-    /* Add offset 0x20 to the normal R/G/B component IDs */
-    SET_COMP(0, 0x72 /* 'r' */, 1,1,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0);
-    SET_COMP(1, 0x67 /* 'g' */, 1,1, 0, 0,0);
-    SET_COMP(2, 0x62 /* 'b' */, 1,1,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0);
-    break;
-  case JCS_BG_YCC:
-    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
-    cinfo->JFIF_major_version = 2;   /* Set JFIF major version = 2 */
-    cinfo->num_components = 3;
-    /* Add offset 0x20 to the normal Cb/Cr component IDs */
-    /* We default to 2x2 subsamples of chrominance */
-    SET_COMP(0, 0x01, 2,2, 0, 0,0);
-    SET_COMP(1, 0x22, 1,1, 1, 1,1);
-    SET_COMP(2, 0x23, 1,1, 1, 1,1);
-    break;
-  default:
-    ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-  }
-}
-
-
-#ifdef C_PROGRESSIVE_SUPPORTED
-
-LOCAL(jpeg_scan_info *)
-fill_a_scan (jpeg_scan_info * scanptr, int ci,
-	     int Ss, int Se, int Ah, int Al)
-/* Support routine: generate one scan for specified component */
-{
-  scanptr->comps_in_scan = 1;
-  scanptr->component_index[0] = ci;
-  scanptr->Ss = Ss;
-  scanptr->Se = Se;
-  scanptr->Ah = Ah;
-  scanptr->Al = Al;
-  scanptr++;
-  return scanptr;
-}
-
-LOCAL(jpeg_scan_info *)
-fill_scans (jpeg_scan_info * scanptr, int ncomps,
-	    int Ss, int Se, int Ah, int Al)
-/* Support routine: generate one scan for each component */
-{
-  int ci;
-
-  for (ci = 0; ci < ncomps; ci++) {
-    scanptr->comps_in_scan = 1;
-    scanptr->component_index[0] = ci;
-    scanptr->Ss = Ss;
-    scanptr->Se = Se;
-    scanptr->Ah = Ah;
-    scanptr->Al = Al;
-    scanptr++;
-  }
-  return scanptr;
-}
-
-LOCAL(jpeg_scan_info *)
-fill_dc_scans (jpeg_scan_info * scanptr, int ncomps, int Ah, int Al)
-/* Support routine: generate interleaved DC scan if possible, else N scans */
-{
-  int ci;
-
-  if (ncomps <= MAX_COMPS_IN_SCAN) {
-    /* Single interleaved DC scan */
-    scanptr->comps_in_scan = ncomps;
-    for (ci = 0; ci < ncomps; ci++)
-      scanptr->component_index[ci] = ci;
-    scanptr->Ss = scanptr->Se = 0;
-    scanptr->Ah = Ah;
-    scanptr->Al = Al;
-    scanptr++;
-  } else {
-    /* Noninterleaved DC scan for each component */
-    scanptr = fill_scans(scanptr, ncomps, 0, 0, Ah, Al);
-  }
-  return scanptr;
-}
-
-
-/*
- * Create a recommended progressive-JPEG script.
- * cinfo->num_components and cinfo->jpeg_color_space must be correct.
- */
-
-GLOBAL(void)
-jpeg_simple_progression (j_compress_ptr cinfo)
-{
-  int ncomps = cinfo->num_components;
-  int nscans;
-  jpeg_scan_info * scanptr;
-
-  /* Safety check to ensure start_compress not called yet. */
-  if (cinfo->global_state != CSTATE_START)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  /* Figure space needed for script.  Calculation must match code below! */
-  if (ncomps == 3 &&
-      (cinfo->jpeg_color_space == JCS_YCbCr ||
-       cinfo->jpeg_color_space == JCS_BG_YCC)) {
-    /* Custom script for YCC color images. */
-    nscans = 10;
-  } else {
-    /* All-purpose script for other color spaces. */
-    if (ncomps > MAX_COMPS_IN_SCAN)
-      nscans = 6 * ncomps;	/* 2 DC + 4 AC scans per component */
-    else
-      nscans = 2 + 4 * ncomps;	/* 2 DC scans; 4 AC scans per component */
-  }
-
-  /* Allocate space for script.
-   * We need to put it in the permanent pool in case the application performs
-   * multiple compressions without changing the settings.  To avoid a memory
-   * leak if jpeg_simple_progression is called repeatedly for the same JPEG
-   * object, we try to re-use previously allocated space, and we allocate
-   * enough space to handle YCC even if initially asked for grayscale.
-   */
-  if (cinfo->script_space == NULL || cinfo->script_space_size < nscans) {
-    cinfo->script_space_size = MAX(nscans, 10);
-    cinfo->script_space = (jpeg_scan_info *)
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-			cinfo->script_space_size * SIZEOF(jpeg_scan_info));
-  }
-  scanptr = cinfo->script_space;
-  cinfo->scan_info = scanptr;
-  cinfo->num_scans = nscans;
-
-  if (ncomps == 3 &&
-      (cinfo->jpeg_color_space == JCS_YCbCr ||
-       cinfo->jpeg_color_space == JCS_BG_YCC)) {
-    /* Custom script for YCC color images. */
-    /* Initial DC scan */
-    scanptr = fill_dc_scans(scanptr, ncomps, 0, 1);
-    /* Initial AC scan: get some luma data out in a hurry */
-    scanptr = fill_a_scan(scanptr, 0, 1, 5, 0, 2);
-    /* Chroma data is too small to be worth expending many scans on */
-    scanptr = fill_a_scan(scanptr, 2, 1, 63, 0, 1);
-    scanptr = fill_a_scan(scanptr, 1, 1, 63, 0, 1);
-    /* Complete spectral selection for luma AC */
-    scanptr = fill_a_scan(scanptr, 0, 6, 63, 0, 2);
-    /* Refine next bit of luma AC */
-    scanptr = fill_a_scan(scanptr, 0, 1, 63, 2, 1);
-    /* Finish DC successive approximation */
-    scanptr = fill_dc_scans(scanptr, ncomps, 1, 0);
-    /* Finish AC successive approximation */
-    scanptr = fill_a_scan(scanptr, 2, 1, 63, 1, 0);
-    scanptr = fill_a_scan(scanptr, 1, 1, 63, 1, 0);
-    /* Luma bottom bit comes last since it's usually largest scan */
-    scanptr = fill_a_scan(scanptr, 0, 1, 63, 1, 0);
-  } else {
-    /* All-purpose script for other color spaces. */
-    /* Successive approximation first pass */
-    scanptr = fill_dc_scans(scanptr, ncomps, 0, 1);
-    scanptr = fill_scans(scanptr, ncomps, 1, 5, 0, 2);
-    scanptr = fill_scans(scanptr, ncomps, 6, 63, 0, 2);
-    /* Successive approximation second pass */
-    scanptr = fill_scans(scanptr, ncomps, 1, 63, 2, 1);
-    /* Successive approximation final pass */
-    scanptr = fill_dc_scans(scanptr, ncomps, 1, 0);
-    scanptr = fill_scans(scanptr, ncomps, 1, 63, 1, 0);
-  }
-}
-
-#endif /* C_PROGRESSIVE_SUPPORTED */
diff --git a/dep/libjpeg/src/jcprepct.c b/dep/libjpeg/src/jcprepct.c
deleted file mode 100644
index 586964bd4..000000000
--- a/dep/libjpeg/src/jcprepct.c
+++ /dev/null
@@ -1,358 +0,0 @@
-/*
- * jcprepct.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2003-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the compression preprocessing controller.
- * This controller manages the color conversion, downsampling,
- * and edge expansion steps.
- *
- * Most of the complexity here is associated with buffering input rows
- * as required by the downsampler.  See the comments at the head of
- * jcsample.c for the downsampler's needs.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* At present, jcsample.c can request context rows only for smoothing.
- * In the future, we might also need context rows for CCIR601 sampling
- * or other more-complex downsampling procedures.  The code to support
- * context rows should be compiled only if needed.
- */
-#ifdef INPUT_SMOOTHING_SUPPORTED
-#define CONTEXT_ROWS_SUPPORTED
-#endif
-
-
-/*
- * For the simple (no-context-row) case, we just need to buffer one
- * row group's worth of pixels for the downsampling step.  At the bottom of
- * the image, we pad to a full row group by replicating the last pixel row.
- * The downsampler's last output row is then replicated if needed to pad
- * out to a full iMCU row.
- *
- * When providing context rows, we must buffer three row groups' worth of
- * pixels.  Three row groups are physically allocated, but the row pointer
- * arrays are made five row groups high, with the extra pointers above and
- * below "wrapping around" to point to the last and first real row groups.
- * This allows the downsampler to access the proper context rows.
- * At the top and bottom of the image, we create dummy context rows by
- * copying the first or last real pixel row.  This copying could be avoided
- * by pointer hacking as is done in jdmainct.c, but it doesn't seem worth the
- * trouble on the compression side.
- */
-
-
-/* Private buffer controller object */
-
-typedef struct {
-  struct jpeg_c_prep_controller pub; /* public fields */
-
-  /* Downsampling input buffer.  This buffer holds color-converted data
-   * until we have enough to do a downsample step.
-   */
-  JSAMPARRAY color_buf[MAX_COMPONENTS];
-
-  JDIMENSION rows_to_go;	/* counts rows remaining in source image */
-  int next_buf_row;		/* index of next row to store in color_buf */
-
-#ifdef CONTEXT_ROWS_SUPPORTED	/* only needed for context case */
-  int this_row_group;		/* starting row index of group to process */
-  int next_buf_stop;		/* downsample when we reach this index */
-#endif
-} my_prep_controller;
-
-typedef my_prep_controller * my_prep_ptr;
-
-
-/*
- * Initialize for a processing pass.
- */
-
-METHODDEF(void)
-start_pass_prep (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
-{
-  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
-
-  if (pass_mode != JBUF_PASS_THRU)
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-
-  /* Initialize total-height counter for detecting bottom of image */
-  prep->rows_to_go = cinfo->image_height;
-  /* Mark the conversion buffer empty */
-  prep->next_buf_row = 0;
-#ifdef CONTEXT_ROWS_SUPPORTED
-  /* Preset additional state variables for context mode.
-   * These aren't used in non-context mode, so we needn't test which mode.
-   */
-  prep->this_row_group = 0;
-  /* Set next_buf_stop to stop after two row groups have been read in. */
-  prep->next_buf_stop = 2 * cinfo->max_v_samp_factor;
-#endif
-}
-
-
-/*
- * Expand an image vertically from height input_rows to height output_rows,
- * by duplicating the bottom row.
- */
-
-LOCAL(void)
-expand_bottom_edge (JSAMPARRAY image_data, JDIMENSION num_cols,
-		    int input_rows, int output_rows)
-{
-  register int row;
-
-  for (row = input_rows; row < output_rows; row++) {
-    jcopy_sample_rows(image_data + input_rows - 1,
-		      image_data + row,
-		      1, num_cols);
-  }
-}
-
-
-/*
- * Process some data in the simple no-context case.
- *
- * Preprocessor output data is counted in "row groups".  A row group
- * is defined to be v_samp_factor sample rows of each component.
- * Downsampling will produce this much data from each max_v_samp_factor
- * input rows.
- */
-
-METHODDEF(void)
-pre_process_data (j_compress_ptr cinfo,
-		  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-		  JDIMENSION in_rows_avail,
-		  JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
-		  JDIMENSION out_row_groups_avail)
-{
-  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
-  int numrows, ci;
-  JDIMENSION inrows;
-  jpeg_component_info * compptr;
-
-  while (*in_row_ctr < in_rows_avail &&
-	 *out_row_group_ctr < out_row_groups_avail) {
-    /* Do color conversion to fill the conversion buffer. */
-    inrows = in_rows_avail - *in_row_ctr;
-    numrows = cinfo->max_v_samp_factor - prep->next_buf_row;
-    numrows = (int) MIN((JDIMENSION) numrows, inrows);
-    (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
-				       prep->color_buf,
-				       (JDIMENSION) prep->next_buf_row,
-				       numrows);
-    *in_row_ctr += numrows;
-    prep->next_buf_row += numrows;
-    prep->rows_to_go -= numrows;
-    /* If at bottom of image, pad to fill the conversion buffer. */
-    if (prep->rows_to_go == 0 &&
-	prep->next_buf_row < cinfo->max_v_samp_factor) {
-      for (ci = 0; ci < cinfo->num_components; ci++) {
-	expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
-			   prep->next_buf_row, cinfo->max_v_samp_factor);
-      }
-      prep->next_buf_row = cinfo->max_v_samp_factor;
-    }
-    /* If we've filled the conversion buffer, empty it. */
-    if (prep->next_buf_row == cinfo->max_v_samp_factor) {
-      (*cinfo->downsample->downsample) (cinfo,
-					prep->color_buf, (JDIMENSION) 0,
-					output_buf, *out_row_group_ctr);
-      prep->next_buf_row = 0;
-      (*out_row_group_ctr)++;
-    }
-    /* If at bottom of image, pad the output to a full iMCU height.
-     * Note we assume the caller is providing a one-iMCU-height output buffer!
-     */
-    if (prep->rows_to_go == 0 &&
-	*out_row_group_ctr < out_row_groups_avail) {
-      for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	   ci++, compptr++) {
-	numrows = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
-		  cinfo->min_DCT_v_scaled_size;
-	expand_bottom_edge(output_buf[ci],
-			   compptr->width_in_blocks * compptr->DCT_h_scaled_size,
-			   (int) (*out_row_group_ctr * numrows),
-			   (int) (out_row_groups_avail * numrows));
-      }
-      *out_row_group_ctr = out_row_groups_avail;
-      break;			/* can exit outer loop without test */
-    }
-  }
-}
-
-
-#ifdef CONTEXT_ROWS_SUPPORTED
-
-/*
- * Process some data in the context case.
- */
-
-METHODDEF(void)
-pre_process_context (j_compress_ptr cinfo,
-		     JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-		     JDIMENSION in_rows_avail,
-		     JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
-		     JDIMENSION out_row_groups_avail)
-{
-  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
-  int numrows, ci;
-  int buf_height = cinfo->max_v_samp_factor * 3;
-  JDIMENSION inrows;
-
-  while (*out_row_group_ctr < out_row_groups_avail) {
-    if (*in_row_ctr < in_rows_avail) {
-      /* Do color conversion to fill the conversion buffer. */
-      inrows = in_rows_avail - *in_row_ctr;
-      numrows = prep->next_buf_stop - prep->next_buf_row;
-      numrows = (int) MIN((JDIMENSION) numrows, inrows);
-      (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
-					 prep->color_buf,
-					 (JDIMENSION) prep->next_buf_row,
-					 numrows);
-      /* Pad at top of image, if first time through */
-      if (prep->rows_to_go == cinfo->image_height) {
-	for (ci = 0; ci < cinfo->num_components; ci++) {
-	  int row;
-	  for (row = 1; row <= cinfo->max_v_samp_factor; row++) {
-	    jcopy_sample_rows(prep->color_buf[ci],
-			      prep->color_buf[ci] - row,
-			      1, cinfo->image_width);
-	  }
-	}
-      }
-      *in_row_ctr += numrows;
-      prep->next_buf_row += numrows;
-      prep->rows_to_go -= numrows;
-    } else {
-      /* Return for more data, unless we are at the bottom of the image. */
-      if (prep->rows_to_go != 0)
-	break;
-      /* When at bottom of image, pad to fill the conversion buffer. */
-      if (prep->next_buf_row < prep->next_buf_stop) {
-	for (ci = 0; ci < cinfo->num_components; ci++) {
-	  expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
-			     prep->next_buf_row, prep->next_buf_stop);
-	}
-	prep->next_buf_row = prep->next_buf_stop;
-      }
-    }
-    /* If we've gotten enough data, downsample a row group. */
-    if (prep->next_buf_row == prep->next_buf_stop) {
-      (*cinfo->downsample->downsample) (cinfo,
-					prep->color_buf,
-					(JDIMENSION) prep->this_row_group,
-					output_buf, *out_row_group_ctr);
-      (*out_row_group_ctr)++;
-      /* Advance pointers with wraparound as necessary. */
-      prep->this_row_group += cinfo->max_v_samp_factor;
-      if (prep->this_row_group >= buf_height)
-	prep->this_row_group = 0;
-      if (prep->next_buf_row >= buf_height)
-	prep->next_buf_row = 0;
-      prep->next_buf_stop = prep->next_buf_row + cinfo->max_v_samp_factor;
-    }
-  }
-}
-
-
-/*
- * Create the wrapped-around downsampling input buffer needed for context mode.
- */
-
-LOCAL(void)
-create_context_buffer (j_compress_ptr cinfo)
-{
-  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
-  int rgroup_height = cinfo->max_v_samp_factor;
-  int ci, i;
-  jpeg_component_info * compptr;
-  JSAMPARRAY true_buffer, fake_buffer;
-
-  /* Grab enough space for fake row pointers for all the components;
-   * we need five row groups' worth of pointers for each component.
-   */
-  fake_buffer = (JSAMPARRAY) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE,
-     (cinfo->num_components * 5 * rgroup_height) * SIZEOF(JSAMPROW));
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Allocate the actual buffer space (3 row groups) for this component.
-     * We make the buffer wide enough to allow the downsampler to edge-expand
-     * horizontally within the buffer, if it so chooses.
-     */
-    true_buffer = (*cinfo->mem->alloc_sarray)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       (JDIMENSION) (((long) compptr->width_in_blocks *
-		      cinfo->min_DCT_h_scaled_size *
-		      cinfo->max_h_samp_factor) / compptr->h_samp_factor),
-       (JDIMENSION) (3 * rgroup_height));
-    /* Copy true buffer row pointers into the middle of the fake row array */
-    MEMCOPY(fake_buffer + rgroup_height, true_buffer,
-	    3 * rgroup_height * SIZEOF(JSAMPROW));
-    /* Fill in the above and below wraparound pointers */
-    for (i = 0; i < rgroup_height; i++) {
-      fake_buffer[i] = true_buffer[2 * rgroup_height + i];
-      fake_buffer[4 * rgroup_height + i] = true_buffer[i];
-    }
-    prep->color_buf[ci] = fake_buffer + rgroup_height;
-    fake_buffer += 5 * rgroup_height; /* point to space for next component */
-  }
-}
-
-#endif /* CONTEXT_ROWS_SUPPORTED */
-
-
-/*
- * Initialize preprocessing controller.
- */
-
-GLOBAL(void)
-jinit_c_prep_controller (j_compress_ptr cinfo, boolean need_full_buffer)
-{
-  my_prep_ptr prep;
-  int ci;
-  jpeg_component_info * compptr;
-
-  if (need_full_buffer)		/* safety check */
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-
-  prep = (my_prep_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_prep_controller));
-  cinfo->prep = &prep->pub;
-  prep->pub.start_pass = start_pass_prep;
-
-  /* Allocate the color conversion buffer.
-   * We make the buffer wide enough to allow the downsampler to edge-expand
-   * horizontally within the buffer, if it so chooses.
-   */
-  if (cinfo->downsample->need_context_rows) {
-    /* Set up to provide context rows */
-#ifdef CONTEXT_ROWS_SUPPORTED
-    prep->pub.pre_process_data = pre_process_context;
-    create_context_buffer(cinfo);
-#else
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif
-  } else {
-    /* No context, just make it tall enough for one row group */
-    prep->pub.pre_process_data = pre_process_data;
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      prep->color_buf[ci] = (*cinfo->mem->alloc_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE,
-	 (JDIMENSION) (((long) compptr->width_in_blocks *
-			cinfo->min_DCT_h_scaled_size *
-			cinfo->max_h_samp_factor) / compptr->h_samp_factor),
-	 (JDIMENSION) cinfo->max_v_samp_factor);
-    }
-  }
-}
diff --git a/dep/libjpeg/src/jcsample.c b/dep/libjpeg/src/jcsample.c
deleted file mode 100644
index 2372c4173..000000000
--- a/dep/libjpeg/src/jcsample.c
+++ /dev/null
@@ -1,545 +0,0 @@
-/*
- * jcsample.c
- *
- * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modified 2003-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains downsampling routines.
- *
- * Downsampling input data is counted in "row groups".  A row group
- * is defined to be max_v_samp_factor pixel rows of each component,
- * from which the downsampler produces v_samp_factor sample rows.
- * A single row group is processed in each call to the downsampler module.
- *
- * The downsampler is responsible for edge-expansion of its output data
- * to fill an integral number of DCT blocks horizontally.  The source buffer
- * may be modified if it is helpful for this purpose (the source buffer is
- * allocated wide enough to correspond to the desired output width).
- * The caller (the prep controller) is responsible for vertical padding.
- *
- * The downsampler may request "context rows" by setting need_context_rows
- * during startup.  In this case, the input arrays will contain at least
- * one row group's worth of pixels above and below the passed-in data;
- * the caller will create dummy rows at image top and bottom by replicating
- * the first or last real pixel row.
- *
- * An excellent reference for image resampling is
- *   Digital Image Warping, George Wolberg, 1990.
- *   Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7.
- *
- * The downsampling algorithm used here is a simple average of the source
- * pixels covered by the output pixel.  The hi-falutin sampling literature
- * refers to this as a "box filter".  In general the characteristics of a box
- * filter are not very good, but for the specific cases we normally use (1:1
- * and 2:1 ratios) the box is equivalent to a "triangle filter" which is not
- * nearly so bad.  If you intend to use other sampling ratios, you'd be well
- * advised to improve this code.
- *
- * A simple input-smoothing capability is provided.  This is mainly intended
- * for cleaning up color-dithered GIF input files (if you find it inadequate,
- * we suggest using an external filtering program such as pnmconvol).  When
- * enabled, each input pixel P is replaced by a weighted sum of itself and its
- * eight neighbors.  P's weight is 1-8*SF and each neighbor's weight is SF,
- * where SF = (smoothing_factor / 1024).
- * Currently, smoothing is only supported for 2h2v sampling factors.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Pointer to routine to downsample a single component */
-typedef JMETHOD(void, downsample1_ptr,
-		(j_compress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY input_data, JSAMPARRAY output_data));
-
-/* Private subobject */
-
-typedef struct {
-  struct jpeg_downsampler pub;	/* public fields */
-
-  /* Downsampling method pointers, one per component */
-  downsample1_ptr methods[MAX_COMPONENTS];
-
-  /* Height of an output row group for each component. */
-  int rowgroup_height[MAX_COMPONENTS];
-
-  /* These arrays save pixel expansion factors so that int_downsample need not
-   * recompute them each time.  They are unused for other downsampling methods.
-   */
-  UINT8 h_expand[MAX_COMPONENTS];
-  UINT8 v_expand[MAX_COMPONENTS];
-} my_downsampler;
-
-typedef my_downsampler * my_downsample_ptr;
-
-
-/*
- * Initialize for a downsampling pass.
- */
-
-METHODDEF(void)
-start_pass_downsample (j_compress_ptr cinfo)
-{
-  /* no work for now */
-}
-
-
-/*
- * Expand a component horizontally from width input_cols to width output_cols,
- * by duplicating the rightmost samples.
- */
-
-LOCAL(void)
-expand_right_edge (JSAMPARRAY image_data, int num_rows,
-		   JDIMENSION input_cols, JDIMENSION output_cols)
-{
-  register JSAMPROW ptr;
-  register JSAMPLE pixval;
-  register int count;
-  int row;
-  int numcols = (int) (output_cols - input_cols);
-
-  if (numcols > 0) {
-    for (row = 0; row < num_rows; row++) {
-      ptr = image_data[row] + input_cols;
-      pixval = ptr[-1];		/* don't need GETJSAMPLE() here */
-      for (count = numcols; count > 0; count--)
-	*ptr++ = pixval;
-    }
-  }
-}
-
-
-/*
- * Do downsampling for a whole row group (all components).
- *
- * In this version we simply downsample each component independently.
- */
-
-METHODDEF(void)
-sep_downsample (j_compress_ptr cinfo,
-		JSAMPIMAGE input_buf, JDIMENSION in_row_index,
-		JSAMPIMAGE output_buf, JDIMENSION out_row_group_index)
-{
-  my_downsample_ptr downsample = (my_downsample_ptr) cinfo->downsample;
-  int ci;
-  jpeg_component_info * compptr;
-  JSAMPARRAY in_ptr, out_ptr;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    in_ptr = input_buf[ci] + in_row_index;
-    out_ptr = output_buf[ci] +
-	      (out_row_group_index * downsample->rowgroup_height[ci]);
-    (*downsample->methods[ci]) (cinfo, compptr, in_ptr, out_ptr);
-  }
-}
-
-
-/*
- * Downsample pixel values of a single component.
- * One row group is processed per call.
- * This version handles arbitrary integral sampling ratios, without smoothing.
- * Note that this version is not actually used for customary sampling ratios.
- */
-
-METHODDEF(void)
-int_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		JSAMPARRAY input_data, JSAMPARRAY output_data)
-{
-  my_downsample_ptr downsample = (my_downsample_ptr) cinfo->downsample;
-  int inrow, outrow, h_expand, v_expand, numpix, numpix2, h, v;
-  JDIMENSION outcol, outcol_h;	/* outcol_h == outcol*h_expand */
-  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
-  JSAMPROW inptr, outptr;
-  INT32 outvalue;
-
-  h_expand = downsample->h_expand[compptr->component_index];
-  v_expand = downsample->v_expand[compptr->component_index];
-  numpix = h_expand * v_expand;
-  numpix2 = numpix/2;
-
-  /* Expand input data enough to let all the output samples be generated
-   * by the standard loop.  Special-casing padded output would be more
-   * efficient.
-   */
-  expand_right_edge(input_data, cinfo->max_v_samp_factor,
-		    cinfo->image_width, output_cols * h_expand);
-
-  inrow = outrow = 0;
-  while (inrow < cinfo->max_v_samp_factor) {
-    outptr = output_data[outrow];
-    for (outcol = 0, outcol_h = 0; outcol < output_cols;
-	 outcol++, outcol_h += h_expand) {
-      outvalue = 0;
-      for (v = 0; v < v_expand; v++) {
-	inptr = input_data[inrow+v] + outcol_h;
-	for (h = 0; h < h_expand; h++) {
-	  outvalue += (INT32) GETJSAMPLE(*inptr++);
-	}
-      }
-      *outptr++ = (JSAMPLE) ((outvalue + numpix2) / numpix);
-    }
-    inrow += v_expand;
-    outrow++;
-  }
-}
-
-
-/*
- * Downsample pixel values of a single component.
- * This version handles the special case of a full-size component,
- * without smoothing.
- */
-
-METHODDEF(void)
-fullsize_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		     JSAMPARRAY input_data, JSAMPARRAY output_data)
-{
-  /* Copy the data */
-  jcopy_sample_rows(input_data, output_data,
-		    cinfo->max_v_samp_factor, cinfo->image_width);
-  /* Edge-expand */
-  expand_right_edge(output_data, cinfo->max_v_samp_factor, cinfo->image_width,
-		    compptr->width_in_blocks * compptr->DCT_h_scaled_size);
-}
-
-
-/*
- * Downsample pixel values of a single component.
- * This version handles the common case of 2:1 horizontal and 1:1 vertical,
- * without smoothing.
- *
- * A note about the "bias" calculations: when rounding fractional values to
- * integer, we do not want to always round 0.5 up to the next integer.
- * If we did that, we'd introduce a noticeable bias towards larger values.
- * Instead, this code is arranged so that 0.5 will be rounded up or down at
- * alternate pixel locations (a simple ordered dither pattern).
- */
-
-METHODDEF(void)
-h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY input_data, JSAMPARRAY output_data)
-{
-  int inrow;
-  JDIMENSION outcol;
-  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
-  register JSAMPROW inptr, outptr;
-  register int bias;
-
-  /* Expand input data enough to let all the output samples be generated
-   * by the standard loop.  Special-casing padded output would be more
-   * efficient.
-   */
-  expand_right_edge(input_data, cinfo->max_v_samp_factor,
-		    cinfo->image_width, output_cols * 2);
-
-  for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
-    outptr = output_data[inrow];
-    inptr = input_data[inrow];
-    bias = 0;			/* bias = 0,1,0,1,... for successive samples */
-    for (outcol = 0; outcol < output_cols; outcol++) {
-      *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1])
-			      + bias) >> 1);
-      bias ^= 1;		/* 0=>1, 1=>0 */
-      inptr += 2;
-    }
-  }
-}
-
-
-/*
- * Downsample pixel values of a single component.
- * This version handles the standard case of 2:1 horizontal and 2:1 vertical,
- * without smoothing.
- */
-
-METHODDEF(void)
-h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY input_data, JSAMPARRAY output_data)
-{
-  int inrow, outrow;
-  JDIMENSION outcol;
-  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
-  register JSAMPROW inptr0, inptr1, outptr;
-  register int bias;
-
-  /* Expand input data enough to let all the output samples be generated
-   * by the standard loop.  Special-casing padded output would be more
-   * efficient.
-   */
-  expand_right_edge(input_data, cinfo->max_v_samp_factor,
-		    cinfo->image_width, output_cols * 2);
-
-  inrow = outrow = 0;
-  while (inrow < cinfo->max_v_samp_factor) {
-    outptr = output_data[outrow];
-    inptr0 = input_data[inrow];
-    inptr1 = input_data[inrow+1];
-    bias = 1;			/* bias = 1,2,1,2,... for successive samples */
-    for (outcol = 0; outcol < output_cols; outcol++) {
-      *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-			      GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1])
-			      + bias) >> 2);
-      bias ^= 3;		/* 1=>2, 2=>1 */
-      inptr0 += 2; inptr1 += 2;
-    }
-    inrow += 2;
-    outrow++;
-  }
-}
-
-
-#ifdef INPUT_SMOOTHING_SUPPORTED
-
-/*
- * Downsample pixel values of a single component.
- * This version handles the standard case of 2:1 horizontal and 2:1 vertical,
- * with smoothing.  One row of context is required.
- */
-
-METHODDEF(void)
-h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-			JSAMPARRAY input_data, JSAMPARRAY output_data)
-{
-  int inrow, outrow;
-  JDIMENSION colctr;
-  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
-  register JSAMPROW inptr0, inptr1, above_ptr, below_ptr, outptr;
-  INT32 membersum, neighsum, memberscale, neighscale;
-
-  /* Expand input data enough to let all the output samples be generated
-   * by the standard loop.  Special-casing padded output would be more
-   * efficient.
-   */
-  expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
-		    cinfo->image_width, output_cols * 2);
-
-  /* We don't bother to form the individual "smoothed" input pixel values;
-   * we can directly compute the output which is the average of the four
-   * smoothed values.  Each of the four member pixels contributes a fraction
-   * (1-8*SF) to its own smoothed image and a fraction SF to each of the three
-   * other smoothed pixels, therefore a total fraction (1-5*SF)/4 to the final
-   * output.  The four corner-adjacent neighbor pixels contribute a fraction
-   * SF to just one smoothed pixel, or SF/4 to the final output; while the
-   * eight edge-adjacent neighbors contribute SF to each of two smoothed
-   * pixels, or SF/2 overall.  In order to use integer arithmetic, these
-   * factors are scaled by 2^16 = 65536.
-   * Also recall that SF = smoothing_factor / 1024.
-   */
-
-  memberscale = 16384 - cinfo->smoothing_factor * 80; /* scaled (1-5*SF)/4 */
-  neighscale = cinfo->smoothing_factor * 16; /* scaled SF/4 */
-
-  inrow = outrow = 0;
-  while (inrow < cinfo->max_v_samp_factor) {
-    outptr = output_data[outrow];
-    inptr0 = input_data[inrow];
-    inptr1 = input_data[inrow+1];
-    above_ptr = input_data[inrow-1];
-    below_ptr = input_data[inrow+2];
-
-    /* Special case for first column: pretend column -1 is same as column 0 */
-    membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-		GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
-    neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
-	       GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
-	       GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) +
-	       GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]);
-    neighsum += neighsum;
-    neighsum += GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[2]) +
-		GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]);
-    membersum = membersum * memberscale + neighsum * neighscale;
-    *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
-    inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
-
-    for (colctr = output_cols - 2; colctr > 0; colctr--) {
-      /* sum of pixels directly mapped to this output element */
-      membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-		  GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
-      /* sum of edge-neighbor pixels */
-      neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
-		 GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
-		 GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) +
-		 GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]);
-      /* The edge-neighbors count twice as much as corner-neighbors */
-      neighsum += neighsum;
-      /* Add in the corner-neighbors */
-      neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[2]) +
-		  GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]);
-      /* form final output scaled up by 2^16 */
-      membersum = membersum * memberscale + neighsum * neighscale;
-      /* round, descale and output it */
-      *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
-      inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
-    }
-
-    /* Special case for last column */
-    membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-		GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
-    neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
-	       GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
-	       GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) +
-	       GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]);
-    neighsum += neighsum;
-    neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[1]) +
-		GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]);
-    membersum = membersum * memberscale + neighsum * neighscale;
-    *outptr = (JSAMPLE) ((membersum + 32768) >> 16);
-
-    inrow += 2;
-    outrow++;
-  }
-}
-
-
-/*
- * Downsample pixel values of a single component.
- * This version handles the special case of a full-size component,
- * with smoothing.  One row of context is required.
- */
-
-METHODDEF(void)
-fullsize_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
-			    JSAMPARRAY input_data, JSAMPARRAY output_data)
-{
-  int inrow;
-  JDIMENSION colctr;
-  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
-  register JSAMPROW inptr, above_ptr, below_ptr, outptr;
-  INT32 membersum, neighsum, memberscale, neighscale;
-  int colsum, lastcolsum, nextcolsum;
-
-  /* Expand input data enough to let all the output samples be generated
-   * by the standard loop.  Special-casing padded output would be more
-   * efficient.
-   */
-  expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
-		    cinfo->image_width, output_cols);
-
-  /* Each of the eight neighbor pixels contributes a fraction SF to the
-   * smoothed pixel, while the main pixel contributes (1-8*SF).  In order
-   * to use integer arithmetic, these factors are multiplied by 2^16 = 65536.
-   * Also recall that SF = smoothing_factor / 1024.
-   */
-
-  memberscale = 65536L - cinfo->smoothing_factor * 512L; /* scaled 1-8*SF */
-  neighscale = cinfo->smoothing_factor * 64; /* scaled SF */
-
-  for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
-    outptr = output_data[inrow];
-    inptr = input_data[inrow];
-    above_ptr = input_data[inrow-1];
-    below_ptr = input_data[inrow+1];
-
-    /* Special case for first column */
-    colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) +
-	     GETJSAMPLE(*inptr);
-    membersum = GETJSAMPLE(*inptr++);
-    nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
-		 GETJSAMPLE(*inptr);
-    neighsum = colsum + (colsum - membersum) + nextcolsum;
-    membersum = membersum * memberscale + neighsum * neighscale;
-    *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
-    lastcolsum = colsum; colsum = nextcolsum;
-
-    for (colctr = output_cols - 2; colctr > 0; colctr--) {
-      membersum = GETJSAMPLE(*inptr++);
-      above_ptr++; below_ptr++;
-      nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
-		   GETJSAMPLE(*inptr);
-      neighsum = lastcolsum + (colsum - membersum) + nextcolsum;
-      membersum = membersum * memberscale + neighsum * neighscale;
-      *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
-      lastcolsum = colsum; colsum = nextcolsum;
-    }
-
-    /* Special case for last column */
-    membersum = GETJSAMPLE(*inptr);
-    neighsum = lastcolsum + (colsum - membersum) + colsum;
-    membersum = membersum * memberscale + neighsum * neighscale;
-    *outptr = (JSAMPLE) ((membersum + 32768) >> 16);
-
-  }
-}
-
-#endif /* INPUT_SMOOTHING_SUPPORTED */
-
-
-/*
- * Module initialization routine for downsampling.
- * Note that we must select a routine for each component.
- */
-
-GLOBAL(void)
-jinit_downsampler (j_compress_ptr cinfo)
-{
-  my_downsample_ptr downsample;
-  int ci;
-  jpeg_component_info * compptr;
-  boolean smoothok = TRUE;
-  int h_in_group, v_in_group, h_out_group, v_out_group;
-
-  downsample = (my_downsample_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_downsampler));
-  cinfo->downsample = &downsample->pub;
-  downsample->pub.start_pass = start_pass_downsample;
-  downsample->pub.downsample = sep_downsample;
-  downsample->pub.need_context_rows = FALSE;
-
-  if (cinfo->CCIR601_sampling)
-    ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
-
-  /* Verify we can handle the sampling factors, and set up method pointers */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Compute size of an "output group" for DCT scaling.  This many samples
-     * are to be converted from max_h_samp_factor * max_v_samp_factor pixels.
-     */
-    h_out_group = (compptr->h_samp_factor * compptr->DCT_h_scaled_size) /
-		  cinfo->min_DCT_h_scaled_size;
-    v_out_group = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
-		  cinfo->min_DCT_v_scaled_size;
-    h_in_group = cinfo->max_h_samp_factor;
-    v_in_group = cinfo->max_v_samp_factor;
-    downsample->rowgroup_height[ci] = v_out_group; /* save for use later */
-    if (h_in_group == h_out_group && v_in_group == v_out_group) {
-#ifdef INPUT_SMOOTHING_SUPPORTED
-      if (cinfo->smoothing_factor) {
-	downsample->methods[ci] = fullsize_smooth_downsample;
-	downsample->pub.need_context_rows = TRUE;
-      } else
-#endif
-	downsample->methods[ci] = fullsize_downsample;
-    } else if (h_in_group == h_out_group * 2 &&
-	       v_in_group == v_out_group) {
-      smoothok = FALSE;
-      downsample->methods[ci] = h2v1_downsample;
-    } else if (h_in_group == h_out_group * 2 &&
-	       v_in_group == v_out_group * 2) {
-#ifdef INPUT_SMOOTHING_SUPPORTED
-      if (cinfo->smoothing_factor) {
-	downsample->methods[ci] = h2v2_smooth_downsample;
-	downsample->pub.need_context_rows = TRUE;
-      } else
-#endif
-	downsample->methods[ci] = h2v2_downsample;
-    } else if ((h_in_group % h_out_group) == 0 &&
-	       (v_in_group % v_out_group) == 0) {
-      smoothok = FALSE;
-      downsample->methods[ci] = int_downsample;
-      downsample->h_expand[ci] = (UINT8) (h_in_group / h_out_group);
-      downsample->v_expand[ci] = (UINT8) (v_in_group / v_out_group);
-    } else
-      ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
-  }
-
-#ifdef INPUT_SMOOTHING_SUPPORTED
-  if (cinfo->smoothing_factor && !smoothok)
-    TRACEMS(cinfo, 0, JTRC_SMOOTH_NOTIMPL);
-#endif
-}
diff --git a/dep/libjpeg/src/jctrans.c b/dep/libjpeg/src/jctrans.c
deleted file mode 100644
index 261dd2996..000000000
--- a/dep/libjpeg/src/jctrans.c
+++ /dev/null
@@ -1,399 +0,0 @@
-/*
- * jctrans.c
- *
- * Copyright (C) 1995-1998, Thomas G. Lane.
- * Modified 2000-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains library routines for transcoding compression,
- * that is, writing raw DCT coefficient arrays to an output JPEG file.
- * The routines in jcapimin.c will also be needed by a transcoder.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Forward declarations */
-LOCAL(void) transencode_master_selection
-	JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
-LOCAL(void) transencode_coef_controller
-	JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
-
-
-/*
- * Compression initialization for writing raw-coefficient data.
- * Before calling this, all parameters and a data destination must be set up.
- * Call jpeg_finish_compress() to actually write the data.
- *
- * The number of passed virtual arrays must match cinfo->num_components.
- * Note that the virtual arrays need not be filled or even realized at
- * the time write_coefficients is called; indeed, if the virtual arrays
- * were requested from this compression object's memory manager, they
- * typically will be realized during this routine and filled afterwards.
- */
-
-GLOBAL(void)
-jpeg_write_coefficients (j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays)
-{
-  if (cinfo->global_state != CSTATE_START)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  /* Mark all tables to be written */
-  jpeg_suppress_tables(cinfo, FALSE);
-  /* (Re)initialize error mgr and destination modules */
-  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
-  (*cinfo->dest->init_destination) (cinfo);
-  /* Perform master selection of active modules */
-  transencode_master_selection(cinfo, coef_arrays);
-  /* Wait for jpeg_finish_compress() call */
-  cinfo->next_scanline = 0;	/* so jpeg_write_marker works */
-  cinfo->global_state = CSTATE_WRCOEFS;
-}
-
-
-/*
- * Initialize the compression object with default parameters,
- * then copy from the source object all parameters needed for lossless
- * transcoding.  Parameters that can be varied without loss (such as
- * scan script and Huffman optimization) are left in their default states.
- */
-
-GLOBAL(void)
-jpeg_copy_critical_parameters (j_decompress_ptr srcinfo,
-			       j_compress_ptr dstinfo)
-{
-  JQUANT_TBL ** qtblptr;
-  jpeg_component_info *incomp, *outcomp;
-  JQUANT_TBL *c_quant, *slot_quant;
-  int tblno, ci, coefi;
-
-  /* Safety check to ensure start_compress not called yet. */
-  if (dstinfo->global_state != CSTATE_START)
-    ERREXIT1(dstinfo, JERR_BAD_STATE, dstinfo->global_state);
-  /* Copy fundamental image dimensions */
-  dstinfo->image_width = srcinfo->image_width;
-  dstinfo->image_height = srcinfo->image_height;
-  dstinfo->input_components = srcinfo->num_components;
-  dstinfo->in_color_space = srcinfo->jpeg_color_space;
-  dstinfo->jpeg_width = srcinfo->output_width;
-  dstinfo->jpeg_height = srcinfo->output_height;
-  dstinfo->min_DCT_h_scaled_size = srcinfo->min_DCT_h_scaled_size;
-  dstinfo->min_DCT_v_scaled_size = srcinfo->min_DCT_v_scaled_size;
-  /* Initialize all parameters to default values */
-  jpeg_set_defaults(dstinfo);
-  /* jpeg_set_defaults may choose wrong colorspace, eg YCbCr if input is RGB.
-   * Fix it to get the right header markers for the image colorspace.
-   * Note: Entropy table assignment in jpeg_set_colorspace
-   * depends on color_transform.
-   * Adaption is also required for setting the appropriate
-   * entropy coding mode dependent on image data precision.
-   */
-  dstinfo->color_transform = srcinfo->color_transform;
-  jpeg_set_colorspace(dstinfo, srcinfo->jpeg_color_space);
-  dstinfo->data_precision = srcinfo->data_precision;
-  dstinfo->arith_code = srcinfo->data_precision > 8 ? TRUE : FALSE;
-  dstinfo->CCIR601_sampling = srcinfo->CCIR601_sampling;
-  /* Copy the source's quantization tables. */
-  for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) {
-    if (srcinfo->quant_tbl_ptrs[tblno] != NULL) {
-      qtblptr = & dstinfo->quant_tbl_ptrs[tblno];
-      if (*qtblptr == NULL)
-	*qtblptr = jpeg_alloc_quant_table((j_common_ptr) dstinfo);
-      MEMCOPY((*qtblptr)->quantval,
-	      srcinfo->quant_tbl_ptrs[tblno]->quantval,
-	      SIZEOF((*qtblptr)->quantval));
-      (*qtblptr)->sent_table = FALSE;
-    }
-  }
-  /* Copy the source's per-component info.
-   * Note we assume jpeg_set_defaults has allocated the dest comp_info array.
-   */
-  dstinfo->num_components = srcinfo->num_components;
-  if (dstinfo->num_components < 1 || dstinfo->num_components > MAX_COMPONENTS)
-    ERREXIT2(dstinfo, JERR_COMPONENT_COUNT, dstinfo->num_components,
-	     MAX_COMPONENTS);
-  for (ci = 0, incomp = srcinfo->comp_info, outcomp = dstinfo->comp_info;
-       ci < dstinfo->num_components; ci++, incomp++, outcomp++) {
-    outcomp->component_id = incomp->component_id;
-    outcomp->h_samp_factor = incomp->h_samp_factor;
-    outcomp->v_samp_factor = incomp->v_samp_factor;
-    outcomp->quant_tbl_no = incomp->quant_tbl_no;
-    /* Make sure saved quantization table for component matches the qtable
-     * slot.  If not, the input file re-used this qtable slot.
-     * IJG encoder currently cannot duplicate this.
-     */
-    tblno = outcomp->quant_tbl_no;
-    if (tblno < 0 || tblno >= NUM_QUANT_TBLS ||
-	srcinfo->quant_tbl_ptrs[tblno] == NULL)
-      ERREXIT1(dstinfo, JERR_NO_QUANT_TABLE, tblno);
-    slot_quant = srcinfo->quant_tbl_ptrs[tblno];
-    c_quant = incomp->quant_table;
-    if (c_quant != NULL) {
-      for (coefi = 0; coefi < DCTSIZE2; coefi++) {
-	if (c_quant->quantval[coefi] != slot_quant->quantval[coefi])
-	  ERREXIT1(dstinfo, JERR_MISMATCHED_QUANT_TABLE, tblno);
-      }
-    }
-    /* Note: we do not copy the source's entropy table assignments;
-     * instead we rely on jpeg_set_colorspace to have made a suitable choice.
-     */
-  }
-  /* Also copy JFIF version and resolution information, if available.
-   * Strictly speaking this isn't "critical" info, but it's nearly
-   * always appropriate to copy it if available.  In particular,
-   * if the application chooses to copy JFIF 1.02 extension markers from
-   * the source file, we need to copy the version to make sure we don't
-   * emit a file that has 1.02 extensions but a claimed version of 1.01.
-   */
-  if (srcinfo->saw_JFIF_marker) {
-    if (srcinfo->JFIF_major_version == 1 ||
-	srcinfo->JFIF_major_version == 2) {
-      dstinfo->JFIF_major_version = srcinfo->JFIF_major_version;
-      dstinfo->JFIF_minor_version = srcinfo->JFIF_minor_version;
-    }
-    dstinfo->density_unit = srcinfo->density_unit;
-    dstinfo->X_density = srcinfo->X_density;
-    dstinfo->Y_density = srcinfo->Y_density;
-  }
-}
-
-
-LOCAL(void)
-jpeg_calc_trans_dimensions (j_compress_ptr cinfo)
-/* Do computations that are needed before master selection phase */
-{
-  if (cinfo->min_DCT_h_scaled_size != cinfo->min_DCT_v_scaled_size)
-    ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
-	     cinfo->min_DCT_h_scaled_size, cinfo->min_DCT_v_scaled_size);
-
-  cinfo->block_size = cinfo->min_DCT_h_scaled_size;
-}
-
-
-/*
- * Master selection of compression modules for transcoding.
- * This substitutes for jcinit.c's initialization of the full compressor.
- */
-
-LOCAL(void)
-transencode_master_selection (j_compress_ptr cinfo,
-			      jvirt_barray_ptr * coef_arrays)
-{
-  /* Do computations that are needed before master selection phase */
-  jpeg_calc_trans_dimensions(cinfo);
-
-  /* Initialize master control (includes parameter checking/processing) */
-  jinit_c_master_control(cinfo, TRUE /* transcode only */);
-
-  /* Entropy encoding: either Huffman or arithmetic coding. */
-  if (cinfo->arith_code)
-    jinit_arith_encoder(cinfo);
-  else {
-    jinit_huff_encoder(cinfo);
-  }
-
-  /* We need a special coefficient buffer controller. */
-  transencode_coef_controller(cinfo, coef_arrays);
-
-  jinit_marker_writer(cinfo);
-
-  /* We can now tell the memory manager to allocate virtual arrays. */
-  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
-
-  /* Write the datastream header (SOI, JFIF) immediately.
-   * Frame and scan headers are postponed till later.
-   * This lets application insert special markers after the SOI.
-   */
-  (*cinfo->marker->write_file_header) (cinfo);
-}
-
-
-/*
- * The rest of this file is a special implementation of the coefficient
- * buffer controller.  This is similar to jccoefct.c, but it handles only
- * output from presupplied virtual arrays.  Furthermore, we generate any
- * dummy padding blocks on-the-fly rather than expecting them to be present
- * in the arrays.
- */
-
-/* Private buffer controller object */
-
-typedef struct {
-  struct jpeg_c_coef_controller pub; /* public fields */
-
-  JDIMENSION iMCU_row_num;	/* iMCU row # within image */
-  JDIMENSION MCU_ctr;		/* counts MCUs processed in current row */
-  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
-  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
-
-  /* Virtual block array for each component. */
-  jvirt_barray_ptr * whole_image;
-
-  /* Workspace for constructing dummy blocks at right/bottom edges. */
-  JBLOCK dummy_buffer[C_MAX_BLOCKS_IN_MCU];
-} my_coef_controller;
-
-typedef my_coef_controller * my_coef_ptr;
-
-
-LOCAL(void)
-start_iMCU_row (j_compress_ptr cinfo)
-/* Reset within-iMCU-row counters for a new row */
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-
-  /* In an interleaved scan, an MCU row is the same as an iMCU row.
-   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
-   * But at the bottom of the image, process only what's left.
-   */
-  if (cinfo->comps_in_scan > 1) {
-    coef->MCU_rows_per_iMCU_row = 1;
-  } else {
-    if (coef->iMCU_row_num < (cinfo->total_iMCU_rows-1))
-      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
-    else
-      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
-  }
-
-  coef->MCU_ctr = 0;
-  coef->MCU_vert_offset = 0;
-}
-
-
-/*
- * Initialize for a processing pass.
- */
-
-METHODDEF(void)
-start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-
-  if (pass_mode != JBUF_CRANK_DEST)
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-
-  coef->iMCU_row_num = 0;
-  start_iMCU_row(cinfo);
-}
-
-
-/*
- * Process some data.
- * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
- * per call, ie, v_samp_factor block rows for each component in the scan.
- * The data is obtained from the virtual arrays and fed to the entropy coder.
- * Returns TRUE if the iMCU row is completed, FALSE if suspended.
- *
- * NB: input_buf is ignored; it is likely to be a NULL pointer.
- */
-
-METHODDEF(boolean)
-compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
-  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
-  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
-  int blkn, ci, xindex, yindex, yoffset, blockcnt;
-  JDIMENSION start_col;
-  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
-  JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU];
-  JBLOCKROW buffer_ptr;
-  jpeg_component_info *compptr;
-
-  /* Align the virtual buffers for the components used in this scan. */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    buffer[ci] = (*cinfo->mem->access_virt_barray)
-      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
-       coef->iMCU_row_num * compptr->v_samp_factor,
-       (JDIMENSION) compptr->v_samp_factor, FALSE);
-  }
-
-  /* Loop to process one whole iMCU row */
-  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
-       yoffset++) {
-    for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col;
-	 MCU_col_num++) {
-      /* Construct list of pointers to DCT blocks belonging to this MCU */
-      blkn = 0;			/* index of current DCT block within MCU */
-      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
-						: compptr->last_col_width;
-	start_col = MCU_col_num * compptr->MCU_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  if (coef->iMCU_row_num < last_iMCU_row ||
-	      yoffset + yindex < compptr->last_row_height) {
-	    /* Fill in pointers to real blocks in this row */
-	    buffer_ptr = buffer[ci][yoffset + yindex] + start_col;
-	    xindex = blockcnt;
-	    do {
-	      MCU_buffer[blkn++] = buffer_ptr++;
-	    } while (--xindex);
-	    /* Dummy blocks at right edge */
-	    if ((xindex = compptr->MCU_width - blockcnt) == 0)
-	      continue;
-	  } else {
-	    /* At bottom of image, need a whole row of dummy blocks */
-	    xindex = compptr->MCU_width;
-	  }
-	  /* Fill in any dummy blocks needed in this row.
-	   * Dummy blocks are filled in the same way as in jccoefct.c:
-	   * all zeroes in the AC entries, DC entries equal to previous
-	   * block's DC value.  The init routine has already zeroed the
-	   * AC entries, so we need only set the DC entries correctly.
-	   */
-	  buffer_ptr = coef->dummy_buffer + blkn;
-	  do {
-	    buffer_ptr[0][0] = MCU_buffer[blkn-1][0][0];
-	    MCU_buffer[blkn++] = buffer_ptr++;
-	  } while (--xindex);
-	}
-      }
-      /* Try to write the MCU. */
-      if (! (*cinfo->entropy->encode_mcu) (cinfo, MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->MCU_ctr = MCU_col_num;
-	return FALSE;
-      }
-    }
-    /* Completed an MCU row, but perhaps not an iMCU row */
-    coef->MCU_ctr = 0;
-  }
-  /* Completed the iMCU row, advance counters for next one */
-  coef->iMCU_row_num++;
-  start_iMCU_row(cinfo);
-  return TRUE;
-}
-
-
-/*
- * Initialize coefficient buffer controller.
- *
- * Each passed coefficient array must be the right size for that
- * coefficient: width_in_blocks wide and height_in_blocks high,
- * with unitheight at least v_samp_factor.
- */
-
-LOCAL(void)
-transencode_coef_controller (j_compress_ptr cinfo,
-			     jvirt_barray_ptr * coef_arrays)
-{
-  my_coef_ptr coef;
-
-  coef = (my_coef_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_coef_controller));
-  cinfo->coef = &coef->pub;
-  coef->pub.start_pass = start_pass_coef;
-  coef->pub.compress_data = compress_output;
-
-  /* Save pointer to virtual arrays */
-  coef->whole_image = coef_arrays;
-
-  /* Pre-zero space for dummy DCT blocks */
-  MEMZERO(coef->dummy_buffer, SIZEOF(coef->dummy_buffer));
-}
diff --git a/dep/libjpeg/src/jdapimin.c b/dep/libjpeg/src/jdapimin.c
deleted file mode 100644
index 785e52722..000000000
--- a/dep/libjpeg/src/jdapimin.c
+++ /dev/null
@@ -1,412 +0,0 @@
-/*
- * jdapimin.c
- *
- * Copyright (C) 1994-1998, Thomas G. Lane.
- * Modified 2009-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains application interface code for the decompression half
- * of the JPEG library.  These are the "minimum" API routines that may be
- * needed in either the normal full-decompression case or the
- * transcoding-only case.
- *
- * Most of the routines intended to be called directly by an application
- * are in this file or in jdapistd.c.  But also see jcomapi.c for routines
- * shared by compression and decompression, and jdtrans.c for the transcoding
- * case.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * Initialization of a JPEG decompression object.
- * The error manager must already be set up (in case memory manager fails).
- */
-
-GLOBAL(void)
-jpeg_CreateDecompress (j_decompress_ptr cinfo, int version, size_t structsize)
-{
-  int i;
-
-  /* Guard against version mismatches between library and caller. */
-  cinfo->mem = NULL;		/* so jpeg_destroy knows mem mgr not called */
-  if (version != JPEG_LIB_VERSION)
-    ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
-  if (structsize != SIZEOF(struct jpeg_decompress_struct))
-    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, 
-	     (int) SIZEOF(struct jpeg_decompress_struct), (int) structsize);
-
-  /* For debugging purposes, we zero the whole master structure.
-   * But the application has already set the err pointer, and may have set
-   * client_data, so we have to save and restore those fields.
-   * Note: if application hasn't set client_data, tools like Purify may
-   * complain here.
-   */
-  {
-    struct jpeg_error_mgr * err = cinfo->err;
-    void * client_data = cinfo->client_data; /* ignore Purify complaint here */
-    MEMZERO(cinfo, SIZEOF(struct jpeg_decompress_struct));
-    cinfo->err = err;
-    cinfo->client_data = client_data;
-  }
-  cinfo->is_decompressor = TRUE;
-
-  /* Initialize a memory manager instance for this object */
-  jinit_memory_mgr((j_common_ptr) cinfo);
-
-  /* Zero out pointers to permanent structures. */
-  cinfo->progress = NULL;
-  cinfo->src = NULL;
-
-  for (i = 0; i < NUM_QUANT_TBLS; i++)
-    cinfo->quant_tbl_ptrs[i] = NULL;
-
-  for (i = 0; i < NUM_HUFF_TBLS; i++) {
-    cinfo->dc_huff_tbl_ptrs[i] = NULL;
-    cinfo->ac_huff_tbl_ptrs[i] = NULL;
-  }
-
-  /* Initialize marker processor so application can override methods
-   * for COM, APPn markers before calling jpeg_read_header.
-   */
-  cinfo->marker_list = NULL;
-  jinit_marker_reader(cinfo);
-
-  /* And initialize the overall input controller. */
-  jinit_input_controller(cinfo);
-
-  /* OK, I'm ready */
-  cinfo->global_state = DSTATE_START;
-}
-
-
-/*
- * Destruction of a JPEG decompression object
- */
-
-GLOBAL(void)
-jpeg_destroy_decompress (j_decompress_ptr cinfo)
-{
-  jpeg_destroy((j_common_ptr) cinfo); /* use common routine */
-}
-
-
-/*
- * Abort processing of a JPEG decompression operation,
- * but don't destroy the object itself.
- */
-
-GLOBAL(void)
-jpeg_abort_decompress (j_decompress_ptr cinfo)
-{
-  jpeg_abort((j_common_ptr) cinfo); /* use common routine */
-}
-
-
-/*
- * Set default decompression parameters.
- */
-
-LOCAL(void)
-default_decompress_parms (j_decompress_ptr cinfo)
-{
-  int cid0, cid1, cid2, cid3;
-
-  /* Guess the input colorspace, and set output colorspace accordingly. */
-  /* Note application may override our guesses. */
-  switch (cinfo->num_components) {
-  case 1:
-    cinfo->jpeg_color_space = JCS_GRAYSCALE;
-    cinfo->out_color_space = JCS_GRAYSCALE;
-    break;
-
-  case 3:
-    cid0 = cinfo->comp_info[0].component_id;
-    cid1 = cinfo->comp_info[1].component_id;
-    cid2 = cinfo->comp_info[2].component_id;
-
-    /* For robust detection of standard colorspaces
-     * regardless of the presence of special markers,
-     * check component IDs from SOF marker first.
-     */
-    if      (cid0 == 0x01 && cid1 == 0x02 && cid2 == 0x03)
-      cinfo->jpeg_color_space = JCS_YCbCr;
-    else if (cid0 == 0x01 && cid1 == 0x22 && cid2 == 0x23)
-      cinfo->jpeg_color_space = JCS_BG_YCC;
-    else if (cid0 == 0x52 && cid1 == 0x47 && cid2 == 0x42)
-      cinfo->jpeg_color_space = JCS_RGB;	/* ASCII 'R', 'G', 'B' */
-    else if (cid0 == 0x72 && cid1 == 0x67 && cid2 == 0x62)
-      cinfo->jpeg_color_space = JCS_BG_RGB;	/* ASCII 'r', 'g', 'b' */
-    else if (cinfo->saw_JFIF_marker)
-      cinfo->jpeg_color_space = JCS_YCbCr;	/* assume it's YCbCr */
-    else if (cinfo->saw_Adobe_marker) {
-      switch (cinfo->Adobe_transform) {
-      case 0:
-	cinfo->jpeg_color_space = JCS_RGB;
-	break;
-      case 1:
-	cinfo->jpeg_color_space = JCS_YCbCr;
-	break;
-      default:
-	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
-	cinfo->jpeg_color_space = JCS_YCbCr;	/* assume it's YCbCr */
-      }
-    } else {
-      TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2);
-      cinfo->jpeg_color_space = JCS_YCbCr;	/* assume it's YCbCr */
-    }
-    /* Always guess RGB is proper output colorspace. */
-    cinfo->out_color_space = JCS_RGB;
-    break;
-
-  case 4:
-    cid0 = cinfo->comp_info[0].component_id;
-    cid1 = cinfo->comp_info[1].component_id;
-    cid2 = cinfo->comp_info[2].component_id;
-    cid3 = cinfo->comp_info[3].component_id;
-
-    /* For robust detection of standard colorspaces
-     * regardless of the presence of special markers,
-     * check component IDs from SOF marker first.
-     */
-    if      (cid0 == 0x01 && cid1 == 0x02 && cid2 == 0x03 && cid3 == 0x04)
-      cinfo->jpeg_color_space = JCS_YCCK;
-    else if (cid0 == 0x43 && cid1 == 0x4D && cid2 == 0x59 && cid3 == 0x4B)
-      cinfo->jpeg_color_space = JCS_CMYK;   /* ASCII 'C', 'M', 'Y', 'K' */
-    else if (cinfo->saw_Adobe_marker) {
-      switch (cinfo->Adobe_transform) {
-      case 0:
-	cinfo->jpeg_color_space = JCS_CMYK;
-	break;
-      case 2:
-	cinfo->jpeg_color_space = JCS_YCCK;
-	break;
-      default:
-	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
-	cinfo->jpeg_color_space = JCS_YCCK;	/* assume it's YCCK */
-      }
-    } else {
-      /* Unknown IDs and no special markers, assume straight CMYK. */
-      cinfo->jpeg_color_space = JCS_CMYK;
-    }
-    cinfo->out_color_space = JCS_CMYK;
-    break;
-
-  default:
-    cinfo->jpeg_color_space = JCS_UNKNOWN;
-    cinfo->out_color_space = JCS_UNKNOWN;
-  }
-
-  /* Set defaults for other decompression parameters. */
-  cinfo->scale_num = cinfo->block_size;		/* 1:1 scaling */
-  cinfo->scale_denom = cinfo->block_size;
-  cinfo->output_gamma = 1.0;
-  cinfo->buffered_image = FALSE;
-  cinfo->raw_data_out = FALSE;
-  cinfo->dct_method = JDCT_DEFAULT;
-  cinfo->do_fancy_upsampling = TRUE;
-  cinfo->do_block_smoothing = TRUE;
-  cinfo->quantize_colors = FALSE;
-  /* We set these in case application only sets quantize_colors. */
-  cinfo->dither_mode = JDITHER_FS;
-#ifdef QUANT_2PASS_SUPPORTED
-  cinfo->two_pass_quantize = TRUE;
-#else
-  cinfo->two_pass_quantize = FALSE;
-#endif
-  cinfo->desired_number_of_colors = 256;
-  cinfo->colormap = NULL;
-  /* Initialize for no mode change in buffered-image mode. */
-  cinfo->enable_1pass_quant = FALSE;
-  cinfo->enable_external_quant = FALSE;
-  cinfo->enable_2pass_quant = FALSE;
-}
-
-
-/*
- * Decompression startup: read start of JPEG datastream to see what's there.
- * Need only initialize JPEG object and supply a data source before calling.
- *
- * This routine will read as far as the first SOS marker (ie, actual start of
- * compressed data), and will save all tables and parameters in the JPEG
- * object.  It will also initialize the decompression parameters to default
- * values, and finally return JPEG_HEADER_OK.  On return, the application may
- * adjust the decompression parameters and then call jpeg_start_decompress.
- * (Or, if the application only wanted to determine the image parameters,
- * the data need not be decompressed.  In that case, call jpeg_abort or
- * jpeg_destroy to release any temporary space.)
- * If an abbreviated (tables only) datastream is presented, the routine will
- * return JPEG_HEADER_TABLES_ONLY upon reaching EOI.  The application may then
- * re-use the JPEG object to read the abbreviated image datastream(s).
- * It is unnecessary (but OK) to call jpeg_abort in this case.
- * The JPEG_SUSPENDED return code only occurs if the data source module
- * requests suspension of the decompressor.  In this case the application
- * should load more source data and then re-call jpeg_read_header to resume
- * processing.
- * If a non-suspending data source is used and require_image is TRUE, then the
- * return code need not be inspected since only JPEG_HEADER_OK is possible.
- *
- * This routine is now just a front end to jpeg_consume_input, with some
- * extra error checking.
- */
-
-GLOBAL(int)
-jpeg_read_header (j_decompress_ptr cinfo, boolean require_image)
-{
-  int retcode;
-
-  if (cinfo->global_state != DSTATE_START &&
-      cinfo->global_state != DSTATE_INHEADER)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  retcode = jpeg_consume_input(cinfo);
-
-  switch (retcode) {
-  case JPEG_REACHED_SOS:
-    retcode = JPEG_HEADER_OK;
-    break;
-  case JPEG_REACHED_EOI:
-    if (require_image)		/* Complain if application wanted an image */
-      ERREXIT(cinfo, JERR_NO_IMAGE);
-    /* Reset to start state; it would be safer to require the application to
-     * call jpeg_abort, but we can't change it now for compatibility reasons.
-     * A side effect is to free any temporary memory (there shouldn't be any).
-     */
-    jpeg_abort((j_common_ptr) cinfo); /* sets state = DSTATE_START */
-    retcode = JPEG_HEADER_TABLES_ONLY;
-    break;
-  case JPEG_SUSPENDED:
-    /* no work */
-    break;
-  }
-
-  return retcode;
-}
-
-
-/*
- * Consume data in advance of what the decompressor requires.
- * This can be called at any time once the decompressor object has
- * been created and a data source has been set up.
- *
- * This routine is essentially a state machine that handles a couple
- * of critical state-transition actions, namely initial setup and
- * transition from header scanning to ready-for-start_decompress.
- * All the actual input is done via the input controller's consume_input
- * method.
- */
-
-GLOBAL(int)
-jpeg_consume_input (j_decompress_ptr cinfo)
-{
-  int retcode = JPEG_SUSPENDED;
-
-  /* NB: every possible DSTATE value should be listed in this switch */
-  switch (cinfo->global_state) {
-  case DSTATE_START:
-    /* Start-of-datastream actions: reset appropriate modules */
-    (*cinfo->inputctl->reset_input_controller) (cinfo);
-    /* Initialize application's data source module */
-    (*cinfo->src->init_source) (cinfo);
-    cinfo->global_state = DSTATE_INHEADER;
-    /*FALLTHROUGH*/
-  case DSTATE_INHEADER:
-    retcode = (*cinfo->inputctl->consume_input) (cinfo);
-    if (retcode == JPEG_REACHED_SOS) { /* Found SOS, prepare to decompress */
-      /* Set up default parameters based on header data */
-      default_decompress_parms(cinfo);
-      /* Set global state: ready for start_decompress */
-      cinfo->global_state = DSTATE_READY;
-    }
-    break;
-  case DSTATE_READY:
-    /* Can't advance past first SOS until start_decompress is called */
-    retcode = JPEG_REACHED_SOS;
-    break;
-  case DSTATE_PRELOAD:
-  case DSTATE_PRESCAN:
-  case DSTATE_SCANNING:
-  case DSTATE_RAW_OK:
-  case DSTATE_BUFIMAGE:
-  case DSTATE_BUFPOST:
-  case DSTATE_STOPPING:
-    retcode = (*cinfo->inputctl->consume_input) (cinfo);
-    break;
-  default:
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  }
-  return retcode;
-}
-
-
-/*
- * Have we finished reading the input file?
- */
-
-GLOBAL(boolean)
-jpeg_input_complete (j_decompress_ptr cinfo)
-{
-  /* Check for valid jpeg object */
-  if (cinfo->global_state < DSTATE_START ||
-      cinfo->global_state > DSTATE_STOPPING)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  return cinfo->inputctl->eoi_reached;
-}
-
-
-/*
- * Is there more than one scan?
- */
-
-GLOBAL(boolean)
-jpeg_has_multiple_scans (j_decompress_ptr cinfo)
-{
-  /* Only valid after jpeg_read_header completes */
-  if (cinfo->global_state < DSTATE_READY ||
-      cinfo->global_state > DSTATE_STOPPING)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  return cinfo->inputctl->has_multiple_scans;
-}
-
-
-/*
- * Finish JPEG decompression.
- *
- * This will normally just verify the file trailer and release temp storage.
- *
- * Returns FALSE if suspended.  The return value need be inspected only if
- * a suspending data source is used.
- */
-
-GLOBAL(boolean)
-jpeg_finish_decompress (j_decompress_ptr cinfo)
-{
-  if ((cinfo->global_state == DSTATE_SCANNING ||
-       cinfo->global_state == DSTATE_RAW_OK) && ! cinfo->buffered_image) {
-    /* Terminate final pass of non-buffered mode */
-    if (cinfo->output_scanline < cinfo->output_height)
-      ERREXIT(cinfo, JERR_TOO_LITTLE_DATA);
-    (*cinfo->master->finish_output_pass) (cinfo);
-    cinfo->global_state = DSTATE_STOPPING;
-  } else if (cinfo->global_state == DSTATE_BUFIMAGE) {
-    /* Finishing after a buffered-image operation */
-    cinfo->global_state = DSTATE_STOPPING;
-  } else if (cinfo->global_state != DSTATE_STOPPING) {
-    /* STOPPING = repeat call after a suspension, anything else is error */
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  }
-  /* Read until EOI */
-  while (! cinfo->inputctl->eoi_reached) {
-    if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
-      return FALSE;		/* Suspend, come back later */
-  }
-  /* Do final cleanup */
-  (*cinfo->src->term_source) (cinfo);
-  /* We can use jpeg_abort to release memory and reset global_state */
-  jpeg_abort((j_common_ptr) cinfo);
-  return TRUE;
-}
diff --git a/dep/libjpeg/src/jdapistd.c b/dep/libjpeg/src/jdapistd.c
deleted file mode 100644
index 7f3a78b25..000000000
--- a/dep/libjpeg/src/jdapistd.c
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * jdapistd.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2002-2013 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains application interface code for the decompression half
- * of the JPEG library.  These are the "standard" API routines that are
- * used in the normal full-decompression case.  They are not used by a
- * transcoding-only application.  Note that if an application links in
- * jpeg_start_decompress, it will end up linking in the entire decompressor.
- * We thus must separate this file from jdapimin.c to avoid linking the
- * whole decompression library into a transcoder.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Forward declarations */
-LOCAL(boolean) output_pass_setup JPP((j_decompress_ptr cinfo));
-
-
-/*
- * Decompression initialization.
- * jpeg_read_header must be completed before calling this.
- *
- * If a multipass operating mode was selected, this will do all but the
- * last pass, and thus may take a great deal of time.
- *
- * Returns FALSE if suspended.  The return value need be inspected only if
- * a suspending data source is used.
- */
-
-GLOBAL(boolean)
-jpeg_start_decompress (j_decompress_ptr cinfo)
-{
-  if (cinfo->global_state == DSTATE_READY) {
-    /* First call: initialize master control, select active modules */
-    jinit_master_decompress(cinfo);
-    if (cinfo->buffered_image) {
-      /* No more work here; expecting jpeg_start_output next */
-      cinfo->global_state = DSTATE_BUFIMAGE;
-      return TRUE;
-    }
-    cinfo->global_state = DSTATE_PRELOAD;
-  }
-  if (cinfo->global_state == DSTATE_PRELOAD) {
-    /* If file has multiple scans, absorb them all into the coef buffer */
-    if (cinfo->inputctl->has_multiple_scans) {
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-      for (;;) {
-	int retcode;
-	/* Call progress monitor hook if present */
-	if (cinfo->progress != NULL)
-	  (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-	/* Absorb some more input */
-	retcode = (*cinfo->inputctl->consume_input) (cinfo);
-	if (retcode == JPEG_SUSPENDED)
-	  return FALSE;
-	if (retcode == JPEG_REACHED_EOI)
-	  break;
-	/* Advance progress counter if appropriate */
-	if (cinfo->progress != NULL &&
-	    (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
-	  if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
-	    /* jdmaster underestimated number of scans; ratchet up one scan */
-	    cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
-	  }
-	}
-      }
-#else
-      ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif /* D_MULTISCAN_FILES_SUPPORTED */
-    }
-    cinfo->output_scan_number = cinfo->input_scan_number;
-  } else if (cinfo->global_state != DSTATE_PRESCAN)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  /* Perform any dummy output passes, and set up for the final pass */
-  return output_pass_setup(cinfo);
-}
-
-
-/*
- * Set up for an output pass, and perform any dummy pass(es) needed.
- * Common subroutine for jpeg_start_decompress and jpeg_start_output.
- * Entry: global_state = DSTATE_PRESCAN only if previously suspended.
- * Exit: If done, returns TRUE and sets global_state for proper output mode.
- *       If suspended, returns FALSE and sets global_state = DSTATE_PRESCAN.
- */
-
-LOCAL(boolean)
-output_pass_setup (j_decompress_ptr cinfo)
-{
-  if (cinfo->global_state != DSTATE_PRESCAN) {
-    /* First call: do pass setup */
-    (*cinfo->master->prepare_for_output_pass) (cinfo);
-    cinfo->output_scanline = 0;
-    cinfo->global_state = DSTATE_PRESCAN;
-  }
-  /* Loop over any required dummy passes */
-  while (cinfo->master->is_dummy_pass) {
-#ifdef QUANT_2PASS_SUPPORTED
-    /* Crank through the dummy pass */
-    while (cinfo->output_scanline < cinfo->output_height) {
-      JDIMENSION last_scanline;
-      /* Call progress monitor hook if present */
-      if (cinfo->progress != NULL) {
-	cinfo->progress->pass_counter = (long) cinfo->output_scanline;
-	cinfo->progress->pass_limit = (long) cinfo->output_height;
-	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-      }
-      /* Process some data */
-      last_scanline = cinfo->output_scanline;
-      (*cinfo->main->process_data) (cinfo, (JSAMPARRAY) NULL,
-				    &cinfo->output_scanline, (JDIMENSION) 0);
-      if (cinfo->output_scanline == last_scanline)
-	return FALSE;		/* No progress made, must suspend */
-    }
-    /* Finish up dummy pass, and set up for another one */
-    (*cinfo->master->finish_output_pass) (cinfo);
-    (*cinfo->master->prepare_for_output_pass) (cinfo);
-    cinfo->output_scanline = 0;
-#else
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif /* QUANT_2PASS_SUPPORTED */
-  }
-  /* Ready for application to drive output pass through
-   * jpeg_read_scanlines or jpeg_read_raw_data.
-   */
-  cinfo->global_state = cinfo->raw_data_out ? DSTATE_RAW_OK : DSTATE_SCANNING;
-  return TRUE;
-}
-
-
-/*
- * Read some scanlines of data from the JPEG decompressor.
- *
- * The return value will be the number of lines actually read.
- * This may be less than the number requested in several cases,
- * including bottom of image, data source suspension, and operating
- * modes that emit multiple scanlines at a time.
- *
- * Note: we warn about excess calls to jpeg_read_scanlines() since
- * this likely signals an application programmer error.  However,
- * an oversize buffer (max_lines > scanlines remaining) is not an error.
- */
-
-GLOBAL(JDIMENSION)
-jpeg_read_scanlines (j_decompress_ptr cinfo, JSAMPARRAY scanlines,
-		     JDIMENSION max_lines)
-{
-  JDIMENSION row_ctr;
-
-  if (cinfo->global_state != DSTATE_SCANNING)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  if (cinfo->output_scanline >= cinfo->output_height) {
-    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
-    return 0;
-  }
-
-  /* Call progress monitor hook if present */
-  if (cinfo->progress != NULL) {
-    cinfo->progress->pass_counter = (long) cinfo->output_scanline;
-    cinfo->progress->pass_limit = (long) cinfo->output_height;
-    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-  }
-
-  /* Process some data */
-  row_ctr = 0;
-  (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, max_lines);
-  cinfo->output_scanline += row_ctr;
-  return row_ctr;
-}
-
-
-/*
- * Alternate entry point to read raw data.
- * Processes exactly one iMCU row per call, unless suspended.
- */
-
-GLOBAL(JDIMENSION)
-jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data,
-		    JDIMENSION max_lines)
-{
-  JDIMENSION lines_per_iMCU_row;
-
-  if (cinfo->global_state != DSTATE_RAW_OK)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  if (cinfo->output_scanline >= cinfo->output_height) {
-    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
-    return 0;
-  }
-
-  /* Call progress monitor hook if present */
-  if (cinfo->progress != NULL) {
-    cinfo->progress->pass_counter = (long) cinfo->output_scanline;
-    cinfo->progress->pass_limit = (long) cinfo->output_height;
-    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-  }
-
-  /* Verify that at least one iMCU row can be returned. */
-  lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->min_DCT_v_scaled_size;
-  if (max_lines < lines_per_iMCU_row)
-    ERREXIT(cinfo, JERR_BUFFER_SIZE);
-
-  /* Decompress directly into user's buffer. */
-  if (! (*cinfo->coef->decompress_data) (cinfo, data))
-    return 0;			/* suspension forced, can do nothing more */
-
-  /* OK, we processed one iMCU row. */
-  cinfo->output_scanline += lines_per_iMCU_row;
-  return lines_per_iMCU_row;
-}
-
-
-/* Additional entry points for buffered-image mode. */
-
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-
-/*
- * Initialize for an output pass in buffered-image mode.
- */
-
-GLOBAL(boolean)
-jpeg_start_output (j_decompress_ptr cinfo, int scan_number)
-{
-  if (cinfo->global_state != DSTATE_BUFIMAGE &&
-      cinfo->global_state != DSTATE_PRESCAN)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  /* Limit scan number to valid range */
-  if (scan_number <= 0)
-    scan_number = 1;
-  if (cinfo->inputctl->eoi_reached &&
-      scan_number > cinfo->input_scan_number)
-    scan_number = cinfo->input_scan_number;
-  cinfo->output_scan_number = scan_number;
-  /* Perform any dummy output passes, and set up for the real pass */
-  return output_pass_setup(cinfo);
-}
-
-
-/*
- * Finish up after an output pass in buffered-image mode.
- *
- * Returns FALSE if suspended.  The return value need be inspected only if
- * a suspending data source is used.
- */
-
-GLOBAL(boolean)
-jpeg_finish_output (j_decompress_ptr cinfo)
-{
-  if ((cinfo->global_state == DSTATE_SCANNING ||
-       cinfo->global_state == DSTATE_RAW_OK) && cinfo->buffered_image) {
-    /* Terminate this pass. */
-    /* We do not require the whole pass to have been completed. */
-    (*cinfo->master->finish_output_pass) (cinfo);
-    cinfo->global_state = DSTATE_BUFPOST;
-  } else if (cinfo->global_state != DSTATE_BUFPOST) {
-    /* BUFPOST = repeat call after a suspension, anything else is error */
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  }
-  /* Read markers looking for SOS or EOI */
-  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
-	 ! cinfo->inputctl->eoi_reached) {
-    if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
-      return FALSE;		/* Suspend, come back later */
-  }
-  cinfo->global_state = DSTATE_BUFIMAGE;
-  return TRUE;
-}
-
-#endif /* D_MULTISCAN_FILES_SUPPORTED */
diff --git a/dep/libjpeg/src/jdarith.c b/dep/libjpeg/src/jdarith.c
deleted file mode 100644
index 2c9abe23f..000000000
--- a/dep/libjpeg/src/jdarith.c
+++ /dev/null
@@ -1,796 +0,0 @@
-/*
- * jdarith.c
- *
- * Developed 1997-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains portable arithmetic entropy decoding routines for JPEG
- * (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81).
- *
- * Both sequential and progressive modes are supported in this single module.
- *
- * Suspension is not currently supported in this module.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Expanded entropy decoder object for arithmetic decoding. */
-
-typedef struct {
-  struct jpeg_entropy_decoder pub; /* public fields */
-
-  INT32 c;       /* C register, base of coding interval + input bit buffer */
-  INT32 a;               /* A register, normalized size of coding interval */
-  int ct;     /* bit shift counter, # of bits left in bit buffer part of C */
-                                                         /* init: ct = -16 */
-                                                         /* run: ct = 0..7 */
-                                                         /* error: ct = -1 */
-  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
-  int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */
-
-  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
-
-  /* Pointers to statistics areas (these workspaces have image lifespan) */
-  unsigned char * dc_stats[NUM_ARITH_TBLS];
-  unsigned char * ac_stats[NUM_ARITH_TBLS];
-
-  /* Statistics bin for coding with fixed probability 0.5 */
-  unsigned char fixed_bin[4];
-} arith_entropy_decoder;
-
-typedef arith_entropy_decoder * arith_entropy_ptr;
-
-/* The following two definitions specify the allocation chunk size
- * for the statistics area.
- * According to sections F.1.4.4.1.3 and F.1.4.4.2, we need at least
- * 49 statistics bins for DC, and 245 statistics bins for AC coding.
- *
- * We use a compact representation with 1 byte per statistics bin,
- * thus the numbers directly represent byte sizes.
- * This 1 byte per statistics bin contains the meaning of the MPS
- * (more probable symbol) in the highest bit (mask 0x80), and the
- * index into the probability estimation state machine table
- * in the lower bits (mask 0x7F).
- */
-
-#define DC_STAT_BINS 64
-#define AC_STAT_BINS 256
-
-
-LOCAL(int)
-get_byte (j_decompress_ptr cinfo)
-/* Read next input byte; we do not support suspension in this module. */
-{
-  struct jpeg_source_mgr * src = cinfo->src;
-
-  if (src->bytes_in_buffer == 0)
-    if (! (*src->fill_input_buffer) (cinfo))
-      ERREXIT(cinfo, JERR_CANT_SUSPEND);
-  src->bytes_in_buffer--;
-  return GETJOCTET(*src->next_input_byte++);
-}
-
-
-/*
- * The core arithmetic decoding routine (common in JPEG and JBIG).
- * This needs to go as fast as possible.
- * Machine-dependent optimization facilities
- * are not utilized in this portable implementation.
- * However, this code should be fairly efficient and
- * may be a good base for further optimizations anyway.
- *
- * Return value is 0 or 1 (binary decision).
- *
- * Note: I've changed the handling of the code base & bit
- * buffer register C compared to other implementations
- * based on the standards layout & procedures.
- * While it also contains both the actual base of the
- * coding interval (16 bits) and the next-bits buffer,
- * the cut-point between these two parts is floating
- * (instead of fixed) with the bit shift counter CT.
- * Thus, we also need only one (variable instead of
- * fixed size) shift for the LPS/MPS decision, and
- * we can do away with any renormalization update
- * of C (except for new data insertion, of course).
- *
- * I've also introduced a new scheme for accessing
- * the probability estimation state machine table,
- * derived from Markus Kuhn's JBIG implementation.
- */
-
-LOCAL(int)
-arith_decode (j_decompress_ptr cinfo, unsigned char *st)
-{
-  register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
-  register unsigned char nl, nm;
-  register INT32 qe, temp;
-  register int sv, data;
-
-  /* Renormalization & data input per section D.2.6 */
-  while (e->a < 0x8000L) {
-    if (--e->ct < 0) {
-      /* Need to fetch next data byte */
-      if (cinfo->unread_marker)
-	data = 0;		/* stuff zero data */
-      else {
-	data = get_byte(cinfo);	/* read next input byte */
-	if (data == 0xFF) {	/* zero stuff or marker code */
-	  do data = get_byte(cinfo);
-	  while (data == 0xFF);	/* swallow extra 0xFF bytes */
-	  if (data == 0)
-	    data = 0xFF;	/* discard stuffed zero byte */
-	  else {
-	    /* Note: Different from the Huffman decoder, hitting
-	     * a marker while processing the compressed data
-	     * segment is legal in arithmetic coding.
-	     * The convention is to supply zero data
-	     * then until decoding is complete.
-	     */
-	    cinfo->unread_marker = data;
-	    data = 0;
-	  }
-	}
-      }
-      e->c = (e->c << 8) | data; /* insert data into C register */
-      if ((e->ct += 8) < 0)	 /* update bit shift counter */
-	/* Need more initial bytes */
-	if (++e->ct == 0)
-	  /* Got 2 initial bytes -> re-init A and exit loop */
-	  e->a = 0x8000L; /* => e->a = 0x10000L after loop exit */
-    }
-    e->a <<= 1;
-  }
-
-  /* Fetch values from our compact representation of Table D.3(D.2):
-   * Qe values and probability estimation state machine
-   */
-  sv = *st;
-  qe = jpeg_aritab[sv & 0x7F];	/* => Qe_Value */
-  nl = qe & 0xFF; qe >>= 8;	/* Next_Index_LPS + Switch_MPS */
-  nm = qe & 0xFF; qe >>= 8;	/* Next_Index_MPS */
-
-  /* Decode & estimation procedures per sections D.2.4 & D.2.5 */
-  temp = e->a - qe;
-  e->a = temp;
-  temp <<= e->ct;
-  if (e->c >= temp) {
-    e->c -= temp;
-    /* Conditional LPS (less probable symbol) exchange */
-    if (e->a < qe) {
-      e->a = qe;
-      *st = (sv & 0x80) ^ nm;	/* Estimate_after_MPS */
-    } else {
-      e->a = qe;
-      *st = (sv & 0x80) ^ nl;	/* Estimate_after_LPS */
-      sv ^= 0x80;		/* Exchange LPS/MPS */
-    }
-  } else if (e->a < 0x8000L) {
-    /* Conditional MPS (more probable symbol) exchange */
-    if (e->a < qe) {
-      *st = (sv & 0x80) ^ nl;	/* Estimate_after_LPS */
-      sv ^= 0x80;		/* Exchange LPS/MPS */
-    } else {
-      *st = (sv & 0x80) ^ nm;	/* Estimate_after_MPS */
-    }
-  }
-
-  return sv >> 7;
-}
-
-
-/*
- * Check for a restart marker & resynchronize decoder.
- */
-
-LOCAL(void)
-process_restart (j_decompress_ptr cinfo)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  int ci;
-  jpeg_component_info * compptr;
-
-  /* Advance past the RSTn marker */
-  if (! (*cinfo->marker->read_restart_marker) (cinfo))
-    ERREXIT(cinfo, JERR_CANT_SUSPEND);
-
-  /* Re-initialize statistics areas */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
-      MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS);
-      /* Reset DC predictions to 0 */
-      entropy->last_dc_val[ci] = 0;
-      entropy->dc_context[ci] = 0;
-    }
-    if ((! cinfo->progressive_mode && cinfo->lim_Se) ||
-	(cinfo->progressive_mode && cinfo->Ss)) {
-      MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS);
-    }
-  }
-
-  /* Reset arithmetic decoding variables */
-  entropy->c = 0;
-  entropy->a = 0;
-  entropy->ct = -16;	/* force reading 2 initial bytes to fill C */
-
-  /* Reset restart counter */
-  entropy->restarts_to_go = cinfo->restart_interval;
-}
-
-
-/*
- * Arithmetic MCU decoding.
- * Each of these routines decodes and returns one MCU's worth of
- * arithmetic-compressed coefficients.
- * The coefficients are reordered from zigzag order into natural array order,
- * but are not dequantized.
- *
- * The i'th block of the MCU is stored into the block pointed to by
- * MCU_data[i].  WE ASSUME THIS AREA IS INITIALLY ZEROED BY THE CALLER.
- */
-
-/*
- * MCU decoding for DC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  JBLOCKROW block;
-  unsigned char *st;
-  int blkn, ci, tbl, sign;
-  int v, m;
-
-  /* Process restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      process_restart(cinfo);
-    entropy->restarts_to_go--;
-  }
-
-  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
-
-  /* Outer loop handles each block in the MCU */
-
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    block = MCU_data[blkn];
-    ci = cinfo->MCU_membership[blkn];
-    tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
-
-    /* Sections F.2.4.1 & F.1.4.4.1: Decoding of DC coefficients */
-
-    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
-    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
-
-    /* Figure F.19: Decode_DC_DIFF */
-    if (arith_decode(cinfo, st) == 0)
-      entropy->dc_context[ci] = 0;
-    else {
-      /* Figure F.21: Decoding nonzero value v */
-      /* Figure F.22: Decoding the sign of v */
-      sign = arith_decode(cinfo, st + 1);
-      st += 2; st += sign;
-      /* Figure F.23: Decoding the magnitude category of v */
-      if ((m = arith_decode(cinfo, st)) != 0) {
-	st = entropy->dc_stats[tbl] + 20;	/* Table F.4: X1 = 20 */
-	while (arith_decode(cinfo, st)) {
-	  if ((m <<= 1) == (int) 0x8000U) {
-	    WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	    entropy->ct = -1;			/* magnitude overflow */
-	    return TRUE;
-	  }
-	  st += 1;
-	}
-      }
-      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
-      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
-	entropy->dc_context[ci] = 0;		   /* zero diff category */
-      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
-	entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */
-      else
-	entropy->dc_context[ci] = 4 + (sign * 4);  /* small diff category */
-      v = m;
-      /* Figure F.24: Decoding the magnitude bit pattern of v */
-      st += 14;
-      while (m >>= 1)
-	if (arith_decode(cinfo, st)) v |= m;
-      v += 1; if (sign) v = -v;
-      entropy->last_dc_val[ci] += v;
-    }
-
-    /* Scale and output the DC coefficient (assumes jpeg_natural_order[0]=0) */
-    (*block)[0] = (JCOEF) (entropy->last_dc_val[ci] << cinfo->Al);
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU decoding for AC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  JBLOCKROW block;
-  unsigned char *st;
-  int tbl, sign, k;
-  int v, m;
-  const int * natural_order;
-
-  /* Process restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      process_restart(cinfo);
-    entropy->restarts_to_go--;
-  }
-
-  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
-
-  natural_order = cinfo->natural_order;
-
-  /* There is always only one block per MCU */
-  block = MCU_data[0];
-  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
-
-  /* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */
-
-  /* Figure F.20: Decode_AC_coefficients */
-  k = cinfo->Ss - 1;
-  do {
-    st = entropy->ac_stats[tbl] + 3 * k;
-    if (arith_decode(cinfo, st)) break;		/* EOB flag */
-    for (;;) {
-      k++;
-      if (arith_decode(cinfo, st + 1)) break;
-      st += 3;
-      if (k >= cinfo->Se) {
-	WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	entropy->ct = -1;			/* spectral overflow */
-	return TRUE;
-      }
-    }
-    /* Figure F.21: Decoding nonzero value v */
-    /* Figure F.22: Decoding the sign of v */
-    sign = arith_decode(cinfo, entropy->fixed_bin);
-    st += 2;
-    /* Figure F.23: Decoding the magnitude category of v */
-    if ((m = arith_decode(cinfo, st)) != 0) {
-      if (arith_decode(cinfo, st)) {
-	m <<= 1;
-	st = entropy->ac_stats[tbl] +
-	     (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
-	while (arith_decode(cinfo, st)) {
-	  if ((m <<= 1) == (int) 0x8000U) {
-	    WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	    entropy->ct = -1;			/* magnitude overflow */
-	    return TRUE;
-	  }
-	  st += 1;
-	}
-      }
-    }
-    v = m;
-    /* Figure F.24: Decoding the magnitude bit pattern of v */
-    st += 14;
-    while (m >>= 1)
-      if (arith_decode(cinfo, st)) v |= m;
-    v += 1; if (sign) v = -v;
-    /* Scale and output coefficient in natural (dezigzagged) order */
-    (*block)[natural_order[k]] = (JCOEF) (v << cinfo->Al);
-  } while (k < cinfo->Se);
-
-  return TRUE;
-}
-
-
-/*
- * MCU decoding for DC successive approximation refinement scan.
- * Note: we assume such scans can be multi-component,
- * although the spec is not very clear on the point.
- */
-
-METHODDEF(boolean)
-decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  unsigned char *st;
-  JCOEF p1;
-  int blkn;
-
-  /* Process restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      process_restart(cinfo);
-    entropy->restarts_to_go--;
-  }
-
-  st = entropy->fixed_bin;	/* use fixed probability estimation */
-  p1 = 1 << cinfo->Al;		/* 1 in the bit position being coded */
-
-  /* Outer loop handles each block in the MCU */
-
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    /* Encoded data is simply the next bit of the two's-complement DC value */
-    if (arith_decode(cinfo, st))
-      MCU_data[blkn][0][0] |= p1;
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU decoding for AC successive approximation refinement scan.
- */
-
-METHODDEF(boolean)
-decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  JBLOCKROW block;
-  JCOEFPTR thiscoef;
-  unsigned char *st;
-  int tbl, k, kex;
-  JCOEF p1, m1;
-  const int * natural_order;
-
-  /* Process restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      process_restart(cinfo);
-    entropy->restarts_to_go--;
-  }
-
-  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
-
-  natural_order = cinfo->natural_order;
-
-  /* There is always only one block per MCU */
-  block = MCU_data[0];
-  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
-
-  p1 = 1 << cinfo->Al;		/* 1 in the bit position being coded */
-  m1 = -p1;			/* -1 in the bit position being coded */
-
-  /* Establish EOBx (previous stage end-of-block) index */
-  kex = cinfo->Se;
-  do {
-    if ((*block)[natural_order[kex]]) break;
-  } while (--kex);
-
-  k = cinfo->Ss - 1;
-  do {
-    st = entropy->ac_stats[tbl] + 3 * k;
-    if (k >= kex)
-      if (arith_decode(cinfo, st)) break;	/* EOB flag */
-    for (;;) {
-      thiscoef = *block + natural_order[++k];
-      if (*thiscoef) {				/* previously nonzero coef */
-	if (arith_decode(cinfo, st + 2)) {
-	  if (*thiscoef < 0)
-	    *thiscoef += m1;
-	  else
-	    *thiscoef += p1;
-	}
-	break;
-      }
-      if (arith_decode(cinfo, st + 1)) {	/* newly nonzero coef */
-	if (arith_decode(cinfo, entropy->fixed_bin))
-	  *thiscoef = m1;
-	else
-	  *thiscoef = p1;
-	break;
-      }
-      st += 3;
-      if (k >= cinfo->Se) {
-	WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	entropy->ct = -1;			/* spectral overflow */
-	return TRUE;
-      }
-    }
-  } while (k < cinfo->Se);
-
-  return TRUE;
-}
-
-
-/*
- * Decode one MCU's worth of arithmetic-compressed coefficients.
- */
-
-METHODDEF(boolean)
-decode_mcu (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  jpeg_component_info * compptr;
-  JBLOCKROW block;
-  unsigned char *st;
-  int blkn, ci, tbl, sign, k;
-  int v, m;
-  const int * natural_order;
-
-  /* Process restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      process_restart(cinfo);
-    entropy->restarts_to_go--;
-  }
-
-  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
-
-  natural_order = cinfo->natural_order;
-
-  /* Outer loop handles each block in the MCU */
-
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    block = MCU_data[blkn];
-    ci = cinfo->MCU_membership[blkn];
-    compptr = cinfo->cur_comp_info[ci];
-
-    /* Sections F.2.4.1 & F.1.4.4.1: Decoding of DC coefficients */
-
-    tbl = compptr->dc_tbl_no;
-
-    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
-    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
-
-    /* Figure F.19: Decode_DC_DIFF */
-    if (arith_decode(cinfo, st) == 0)
-      entropy->dc_context[ci] = 0;
-    else {
-      /* Figure F.21: Decoding nonzero value v */
-      /* Figure F.22: Decoding the sign of v */
-      sign = arith_decode(cinfo, st + 1);
-      st += 2; st += sign;
-      /* Figure F.23: Decoding the magnitude category of v */
-      if ((m = arith_decode(cinfo, st)) != 0) {
-	st = entropy->dc_stats[tbl] + 20;	/* Table F.4: X1 = 20 */
-	while (arith_decode(cinfo, st)) {
-	  if ((m <<= 1) == (int) 0x8000U) {
-	    WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	    entropy->ct = -1;			/* magnitude overflow */
-	    return TRUE;
-	  }
-	  st += 1;
-	}
-      }
-      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
-      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
-	entropy->dc_context[ci] = 0;		   /* zero diff category */
-      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
-	entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */
-      else
-	entropy->dc_context[ci] = 4 + (sign * 4);  /* small diff category */
-      v = m;
-      /* Figure F.24: Decoding the magnitude bit pattern of v */
-      st += 14;
-      while (m >>= 1)
-	if (arith_decode(cinfo, st)) v |= m;
-      v += 1; if (sign) v = -v;
-      entropy->last_dc_val[ci] += v;
-    }
-
-    (*block)[0] = (JCOEF) entropy->last_dc_val[ci];
-
-    /* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */
-
-    if (cinfo->lim_Se == 0) continue;
-    tbl = compptr->ac_tbl_no;
-    k = 0;
-
-    /* Figure F.20: Decode_AC_coefficients */
-    do {
-      st = entropy->ac_stats[tbl] + 3 * k;
-      if (arith_decode(cinfo, st)) break;	/* EOB flag */
-      for (;;) {
-	k++;
-	if (arith_decode(cinfo, st + 1)) break;
-	st += 3;
-	if (k >= cinfo->lim_Se) {
-	  WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	  entropy->ct = -1;			/* spectral overflow */
-	  return TRUE;
-	}
-      }
-      /* Figure F.21: Decoding nonzero value v */
-      /* Figure F.22: Decoding the sign of v */
-      sign = arith_decode(cinfo, entropy->fixed_bin);
-      st += 2;
-      /* Figure F.23: Decoding the magnitude category of v */
-      if ((m = arith_decode(cinfo, st)) != 0) {
-	if (arith_decode(cinfo, st)) {
-	  m <<= 1;
-	  st = entropy->ac_stats[tbl] +
-	       (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
-	  while (arith_decode(cinfo, st)) {
-	    if ((m <<= 1) == (int) 0x8000U) {
-	      WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	      entropy->ct = -1;			/* magnitude overflow */
-	      return TRUE;
-	    }
-	    st += 1;
-	  }
-	}
-      }
-      v = m;
-      /* Figure F.24: Decoding the magnitude bit pattern of v */
-      st += 14;
-      while (m >>= 1)
-	if (arith_decode(cinfo, st)) v |= m;
-      v += 1; if (sign) v = -v;
-      (*block)[natural_order[k]] = (JCOEF) v;
-    } while (k < cinfo->lim_Se);
-  }
-
-  return TRUE;
-}
-
-
-/*
- * Initialize for an arithmetic-compressed scan.
- */
-
-METHODDEF(void)
-start_pass (j_decompress_ptr cinfo)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  int ci, tbl;
-  jpeg_component_info * compptr;
-
-  if (cinfo->progressive_mode) {
-    /* Validate progressive scan parameters */
-    if (cinfo->Ss == 0) {
-      if (cinfo->Se != 0)
-	goto bad;
-    } else {
-      /* need not check Ss/Se < 0 since they came from unsigned bytes */
-      if (cinfo->Se < cinfo->Ss || cinfo->Se > cinfo->lim_Se)
-	goto bad;
-      /* AC scans may have only one component */
-      if (cinfo->comps_in_scan != 1)
-	goto bad;
-    }
-    if (cinfo->Ah != 0) {
-      /* Successive approximation refinement scan: must have Al = Ah-1. */
-      if (cinfo->Ah-1 != cinfo->Al)
-	goto bad;
-    }
-    if (cinfo->Al > 13) {	/* need not check for < 0 */
-      bad:
-      ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
-	       cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
-    }
-    /* Update progression status, and verify that scan order is legal.
-     * Note that inter-scan inconsistencies are treated as warnings
-     * not fatal errors ... not clear if this is right way to behave.
-     */
-    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-      int coefi, cindex = cinfo->cur_comp_info[ci]->component_index;
-      int *coef_bit_ptr = & cinfo->coef_bits[cindex][0];
-      if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
-	WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
-      for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
-	int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
-	if (cinfo->Ah != expected)
-	  WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
-	coef_bit_ptr[coefi] = cinfo->Al;
-      }
-    }
-    /* Select MCU decoding routine */
-    if (cinfo->Ah == 0) {
-      if (cinfo->Ss == 0)
-	entropy->pub.decode_mcu = decode_mcu_DC_first;
-      else
-	entropy->pub.decode_mcu = decode_mcu_AC_first;
-    } else {
-      if (cinfo->Ss == 0)
-	entropy->pub.decode_mcu = decode_mcu_DC_refine;
-      else
-	entropy->pub.decode_mcu = decode_mcu_AC_refine;
-    }
-  } else {
-    /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
-     * This ought to be an error condition, but we make it a warning.
-     */
-    if (cinfo->Ss != 0 || cinfo->Ah != 0 || cinfo->Al != 0 ||
-	(cinfo->Se < DCTSIZE2 && cinfo->Se != cinfo->lim_Se))
-      WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
-    /* Select MCU decoding routine */
-    entropy->pub.decode_mcu = decode_mcu;
-  }
-
-  /* Allocate & initialize requested statistics areas */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
-      tbl = compptr->dc_tbl_no;
-      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
-	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
-      if (entropy->dc_stats[tbl] == NULL)
-	entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
-	  ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS);
-      MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS);
-      /* Initialize DC predictions to 0 */
-      entropy->last_dc_val[ci] = 0;
-      entropy->dc_context[ci] = 0;
-    }
-    if ((! cinfo->progressive_mode && cinfo->lim_Se) ||
-	(cinfo->progressive_mode && cinfo->Ss)) {
-      tbl = compptr->ac_tbl_no;
-      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
-	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
-      if (entropy->ac_stats[tbl] == NULL)
-	entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
-	  ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS);
-      MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS);
-    }
-  }
-
-  /* Initialize arithmetic decoding variables */
-  entropy->c = 0;
-  entropy->a = 0;
-  entropy->ct = -16;	/* force reading 2 initial bytes to fill C */
-
-  /* Initialize restart counter */
-  entropy->restarts_to_go = cinfo->restart_interval;
-}
-
-
-/*
- * Finish up at the end of an arithmetic-compressed scan.
- */
-
-METHODDEF(void)
-finish_pass (j_decompress_ptr cinfo)
-{
-  /* no work necessary here */
-}
-
-
-/*
- * Module initialization routine for arithmetic entropy decoding.
- */
-
-GLOBAL(void)
-jinit_arith_decoder (j_decompress_ptr cinfo)
-{
-  arith_entropy_ptr entropy;
-  int i;
-
-  entropy = (arith_entropy_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(arith_entropy_decoder));
-  cinfo->entropy = &entropy->pub;
-  entropy->pub.start_pass = start_pass;
-  entropy->pub.finish_pass = finish_pass;
-
-  /* Mark tables unallocated */
-  for (i = 0; i < NUM_ARITH_TBLS; i++) {
-    entropy->dc_stats[i] = NULL;
-    entropy->ac_stats[i] = NULL;
-  }
-
-  /* Initialize index for fixed probability estimation */
-  entropy->fixed_bin[0] = 113;
-
-  if (cinfo->progressive_mode) {
-    /* Create progression status table */
-    int *coef_bit_ptr, ci;
-    cinfo->coef_bits = (int (*)[DCTSIZE2]) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       cinfo->num_components * DCTSIZE2 * SIZEOF(int));
-    coef_bit_ptr = & cinfo->coef_bits[0][0];
-    for (ci = 0; ci < cinfo->num_components; ci++) 
-      for (i = 0; i < DCTSIZE2; i++)
-	*coef_bit_ptr++ = -1;
-  }
-}
diff --git a/dep/libjpeg/src/jdatadst.c b/dep/libjpeg/src/jdatadst.c
deleted file mode 100644
index b3b4798ea..000000000
--- a/dep/libjpeg/src/jdatadst.c
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * jdatadst.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2009-2022 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains compression data destination routines for the case of
- * emitting JPEG data to memory or to a file (or any stdio stream).
- * While these routines are sufficient for most applications,
- * some will want to use a different destination manager.
- * IMPORTANT: we assume that fwrite() will correctly transcribe an array of
- * JOCTETs into 8-bit-wide elements on external storage.  If char is wider
- * than 8 bits on your machine, you may need to do some tweaking.
- */
-
-/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jerror.h"
-
-#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc(),free() */
-extern void * malloc JPP((size_t size));
-extern void free JPP((void *ptr));
-#endif
-
-
-/* Expanded data destination object for stdio output */
-
-#define OUTPUT_BUF_SIZE  4096	/* choose an efficiently fwrite'able size */
-
-typedef struct {
-  struct jpeg_destination_mgr pub; /* public fields */
-
-  FILE * outfile;		/* target stream */
-  JOCTET buffer[OUTPUT_BUF_SIZE]; /* output buffer */
-} my_destination_mgr;
-
-typedef my_destination_mgr * my_dest_ptr;
-
-
-/* Expanded data destination object for memory output */
-
-typedef struct {
-  struct jpeg_destination_mgr pub; /* public fields */
-
-  unsigned char ** outbuffer;	/* target buffer */
-  size_t * outsize;
-  unsigned char * newbuffer;	/* newly allocated buffer */
-  JOCTET * buffer;		/* start of buffer */
-  size_t bufsize;
-} my_mem_destination_mgr;
-
-typedef my_mem_destination_mgr * my_mem_dest_ptr;
-
-
-/*
- * Initialize destination --- called by jpeg_start_compress
- * before any data is actually written.
- */
-
-METHODDEF(void)
-init_destination (j_compress_ptr cinfo)
-{
-  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
-
-  dest->pub.next_output_byte = dest->buffer;
-  dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
-}
-
-METHODDEF(void)
-init_mem_destination (j_compress_ptr cinfo)
-{
-  /* no work necessary here */
-}
-
-
-/*
- * Empty the output buffer --- called whenever buffer fills up.
- *
- * In typical applications, this should write the entire output buffer
- * (ignoring the current state of next_output_byte & free_in_buffer),
- * reset the pointer & count to the start of the buffer, and return TRUE
- * indicating that the buffer has been dumped.
- *
- * In applications that need to be able to suspend compression due to output
- * overrun, a FALSE return indicates that the buffer cannot be emptied now.
- * In this situation, the compressor will return to its caller (possibly with
- * an indication that it has not accepted all the supplied scanlines).  The
- * application should resume compression after it has made more room in the
- * output buffer.  Note that there are substantial restrictions on the use of
- * suspension --- see the documentation.
- *
- * When suspending, the compressor will back up to a convenient restart point
- * (typically the start of the current MCU). next_output_byte & free_in_buffer
- * indicate where the restart point will be if the current call returns FALSE.
- * Data beyond this point will be regenerated after resumption, so do not
- * write it out when emptying the buffer externally.
- */
-
-METHODDEF(boolean)
-empty_output_buffer (j_compress_ptr cinfo)
-{
-  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
-
-  if (JFWRITE(dest->outfile, dest->buffer, OUTPUT_BUF_SIZE) !=
-      (size_t) OUTPUT_BUF_SIZE)
-    ERREXIT(cinfo, JERR_FILE_WRITE);
-
-  dest->pub.next_output_byte = dest->buffer;
-  dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
-
-  return TRUE;
-}
-
-METHODDEF(boolean)
-empty_mem_output_buffer (j_compress_ptr cinfo)
-{
-  size_t nextsize;
-  JOCTET * nextbuffer;
-  my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest;
-
-  /* Try to allocate new buffer with double size */
-  nextsize = dest->bufsize * 2;
-  nextbuffer = (JOCTET *) malloc(nextsize);
-
-  if (nextbuffer == NULL)
-    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 11);
-
-  MEMCOPY(nextbuffer, dest->buffer, dest->bufsize);
-
-  if (dest->newbuffer != NULL)
-    free(dest->newbuffer);
-
-  dest->newbuffer = nextbuffer;
-
-  dest->pub.next_output_byte = nextbuffer + dest->bufsize;
-  dest->pub.free_in_buffer = dest->bufsize;
-
-  dest->buffer = nextbuffer;
-  dest->bufsize = nextsize;
-
-  return TRUE;
-}
-
-
-/*
- * Terminate destination --- called by jpeg_finish_compress
- * after all data has been written.  Usually needs to flush buffer.
- *
- * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
- * application must deal with any cleanup that should happen even
- * for error exit.
- */
-
-METHODDEF(void)
-term_destination (j_compress_ptr cinfo)
-{
-  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
-  size_t datacount = OUTPUT_BUF_SIZE - dest->pub.free_in_buffer;
-
-  /* Write any data remaining in the buffer */
-  if (datacount > 0) {
-    if (JFWRITE(dest->outfile, dest->buffer, datacount) != datacount)
-      ERREXIT(cinfo, JERR_FILE_WRITE);
-  }
-  JFFLUSH(dest->outfile);
-  /* Make sure we wrote the output file OK */
-  if (JFERROR(dest->outfile))
-    ERREXIT(cinfo, JERR_FILE_WRITE);
-}
-
-METHODDEF(void)
-term_mem_destination (j_compress_ptr cinfo)
-{
-  my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest;
-
-  *dest->outbuffer = dest->buffer;
-  *dest->outsize = dest->bufsize - dest->pub.free_in_buffer;
-}
-
-
-/*
- * Prepare for output to a stdio stream.
- * The caller must have already opened the stream,
- * and is responsible for closing it after finishing compression.
- */
-
-GLOBAL(void)
-jpeg_stdio_dest (j_compress_ptr cinfo, FILE * outfile)
-{
-  my_dest_ptr dest;
-
-  /* The destination object is made permanent so that multiple JPEG images
-   * can be written to the same file without re-executing jpeg_stdio_dest.
-   * This makes it dangerous to use this manager and a different destination
-   * manager serially with the same JPEG object, because their private object
-   * sizes may be different.  Caveat programmer.
-   */
-  if (cinfo->dest == NULL) {	/* first time for this JPEG object? */
-    cinfo->dest = (struct jpeg_destination_mgr *) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_PERMANENT, SIZEOF(my_destination_mgr));
-  }
-
-  dest = (my_dest_ptr) cinfo->dest;
-  dest->pub.init_destination = init_destination;
-  dest->pub.empty_output_buffer = empty_output_buffer;
-  dest->pub.term_destination = term_destination;
-  dest->outfile = outfile;
-}
-
-
-/*
- * Prepare for output to a memory buffer.
- * The caller may supply an own initial buffer with appropriate size.
- * Otherwise, or when the actual data output exceeds the given size,
- * the library adapts the buffer size as necessary.
- * The standard library functions malloc/free are used for allocating
- * larger memory, so the buffer is available to the application after
- * finishing compression, and then the application is responsible for
- * freeing the requested memory.
- * Note:  An initial buffer supplied by the caller is expected to be
- * managed by the application.  The library does not free such buffer
- * when allocating a larger buffer.
- */
-
-GLOBAL(void)
-jpeg_mem_dest (j_compress_ptr cinfo,
-	       unsigned char ** outbuffer, size_t * outsize)
-{
-  my_mem_dest_ptr dest;
-
-  if (outbuffer == NULL || outsize == NULL)	/* sanity check */
-    ERREXIT(cinfo, JERR_BUFFER_SIZE);
-
-  /* The destination object is made permanent so that multiple JPEG images
-   * can be written to the same buffer without re-executing jpeg_mem_dest.
-   */
-  if (cinfo->dest == NULL) {	/* first time for this JPEG object? */
-    cinfo->dest = (struct jpeg_destination_mgr *) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_PERMANENT, SIZEOF(my_mem_destination_mgr));
-  }
-
-  dest = (my_mem_dest_ptr) cinfo->dest;
-  dest->pub.init_destination = init_mem_destination;
-  dest->pub.empty_output_buffer = empty_mem_output_buffer;
-  dest->pub.term_destination = term_mem_destination;
-  dest->outbuffer = outbuffer;
-  dest->outsize = outsize;
-  dest->newbuffer = NULL;
-
-  if (*outbuffer == NULL || *outsize == 0) {
-    /* Allocate initial buffer */
-    dest->newbuffer = *outbuffer = (unsigned char *) malloc(OUTPUT_BUF_SIZE);
-    if (dest->newbuffer == NULL)
-      ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
-    *outsize = OUTPUT_BUF_SIZE;
-  }
-
-  dest->pub.next_output_byte = dest->buffer = *outbuffer;
-  dest->pub.free_in_buffer = dest->bufsize = *outsize;
-}
diff --git a/dep/libjpeg/src/jdatasrc.c b/dep/libjpeg/src/jdatasrc.c
deleted file mode 100644
index fd7a1a594..000000000
--- a/dep/libjpeg/src/jdatasrc.c
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * jdatasrc.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2009-2022 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains decompression data source routines for the case of
- * reading JPEG data from memory or from a file (or any stdio stream).
- * While these routines are sufficient for most applications,
- * some will want to use a different source manager.
- * IMPORTANT: we assume that fread() will correctly transcribe an array of
- * JOCTETs from 8-bit-wide elements on external storage.  If char is wider
- * than 8 bits on your machine, you may need to do some tweaking.
- */
-
-/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jerror.h"
-
-
-/* Expanded data source object for stdio input */
-
-#define INPUT_BUF_SIZE  4096	/* choose an efficiently fread'able size */
-
-typedef struct {
-  struct jpeg_source_mgr pub;	/* public fields */
-
-  FILE * infile;		/* source stream */
-  JOCTET buffer[INPUT_BUF_SIZE]; /* input buffer */
-  boolean start_of_file;	/* have we gotten any data yet? */
-} my_source_mgr;
-
-typedef my_source_mgr * my_src_ptr;
-
-
-/*
- * Initialize source --- called by jpeg_read_header
- * before any data is actually read.
- */
-
-METHODDEF(void)
-init_source (j_decompress_ptr cinfo)
-{
-  my_src_ptr src = (my_src_ptr) cinfo->src;
-
-  /* We reset the empty-input-file flag for each image,
-   * but we don't clear the input buffer.
-   * This is correct behavior for reading a series of images from one source.
-   */
-  src->start_of_file = TRUE;
-}
-
-METHODDEF(void)
-init_mem_source (j_decompress_ptr cinfo)
-{
-  /* no work necessary here */
-}
-
-
-/*
- * Fill the input buffer --- called whenever buffer is emptied.
- *
- * In typical applications, this should read fresh data into the buffer
- * (ignoring the current state of next_input_byte & bytes_in_buffer),
- * reset the pointer & count to the start of the buffer, and return TRUE
- * indicating that the buffer has been reloaded.  It is not necessary to
- * fill the buffer entirely, only to obtain at least one more byte.
- *
- * There is no such thing as an EOF return.  If the end of the file has been
- * reached, the routine has a choice of ERREXIT() or inserting fake data into
- * the buffer.  In most cases, generating a warning message and inserting a
- * fake EOI marker is the best course of action --- this will allow the
- * decompressor to output however much of the image is there.  However,
- * the resulting error message is misleading if the real problem is an empty
- * input file, so we handle that case specially.
- *
- * In applications that need to be able to suspend compression due to input
- * not being available yet, a FALSE return indicates that no more data can be
- * obtained right now, but more may be forthcoming later.  In this situation,
- * the decompressor will return to its caller (with an indication of the
- * number of scanlines it has read, if any).  The application should resume
- * decompression after it has loaded more data into the input buffer.  Note
- * that there are substantial restrictions on the use of suspension --- see
- * the documentation.
- *
- * When suspending, the decompressor will back up to a convenient restart point
- * (typically the start of the current MCU). next_input_byte & bytes_in_buffer
- * indicate where the restart point will be if the current call returns FALSE.
- * Data beyond this point must be rescanned after resumption, so move it to
- * the front of the buffer rather than discarding it.
- */
-
-METHODDEF(boolean)
-fill_input_buffer (j_decompress_ptr cinfo)
-{
-  my_src_ptr src = (my_src_ptr) cinfo->src;
-  size_t nbytes;
-
-  nbytes = JFREAD(src->infile, src->buffer, INPUT_BUF_SIZE);
-
-  if (nbytes <= 0) {
-    if (src->start_of_file)	/* Treat empty input file as fatal error */
-      ERREXIT(cinfo, JERR_INPUT_EMPTY);
-    WARNMS(cinfo, JWRN_JPEG_EOF);
-    /* Insert a fake EOI marker */
-    src->buffer[0] = (JOCTET) 0xFF;
-    src->buffer[1] = (JOCTET) JPEG_EOI;
-    nbytes = 2;
-  }
-
-  src->pub.next_input_byte = src->buffer;
-  src->pub.bytes_in_buffer = nbytes;
-  src->start_of_file = FALSE;
-
-  return TRUE;
-}
-
-METHODDEF(boolean)
-fill_mem_input_buffer (j_decompress_ptr cinfo)
-{
-  static const JOCTET mybuffer[4] = {
-    (JOCTET) 0xFF, (JOCTET) JPEG_EOI, 0, 0
-  };
-
-  /* The whole JPEG data is expected to reside in the supplied memory
-   * buffer, so any request for more data beyond the given buffer size
-   * is treated as an error.
-   */
-  WARNMS(cinfo, JWRN_JPEG_EOF);
-
-  /* Insert a fake EOI marker */
-
-  cinfo->src->next_input_byte = mybuffer;
-  cinfo->src->bytes_in_buffer = 2;
-
-  return TRUE;
-}
-
-
-/*
- * Skip data --- used to skip over a potentially large amount of
- * uninteresting data (such as an APPn marker).
- *
- * Writers of suspendable-input applications must note that skip_input_data
- * is not granted the right to give a suspension return.  If the skip extends
- * beyond the data currently in the buffer, the buffer can be marked empty so
- * that the next read will cause a fill_input_buffer call that can suspend.
- * Arranging for additional bytes to be discarded before reloading the input
- * buffer is the application writer's problem.
- */
-
-METHODDEF(void)
-skip_input_data (j_decompress_ptr cinfo, long num_bytes)
-{
-  struct jpeg_source_mgr * src = cinfo->src;
-  size_t nbytes;
-
-  /* Just a dumb implementation for now.  Could use fseek() except
-   * it doesn't work on pipes.  Not clear that being smart is worth
-   * any trouble anyway --- large skips are infrequent.
-   */
-  if (num_bytes > 0) {
-    nbytes = (size_t) num_bytes;
-    while (nbytes > src->bytes_in_buffer) {
-      nbytes -= src->bytes_in_buffer;
-      (void) (*src->fill_input_buffer) (cinfo);
-      /* note we assume that fill_input_buffer will never return FALSE,
-       * so suspension need not be handled.
-       */
-    }
-    src->next_input_byte += nbytes;
-    src->bytes_in_buffer -= nbytes;
-  }
-}
-
-
-/*
- * An additional method that can be provided by data source modules is the
- * resync_to_restart method for error recovery in the presence of RST markers.
- * For the moment, this source module just uses the default resync method
- * provided by the JPEG library.  That method assumes that no backtracking
- * is possible.
- */
-
-
-/*
- * Terminate source --- called by jpeg_finish_decompress
- * after all data has been read.  Often a no-op.
- *
- * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
- * application must deal with any cleanup that should happen even
- * for error exit.
- */
-
-METHODDEF(void)
-term_source (j_decompress_ptr cinfo)
-{
-  /* no work necessary here */
-}
-
-
-/*
- * Prepare for input from a stdio stream.
- * The caller must have already opened the stream,
- * and is responsible for closing it after finishing decompression.
- */
-
-GLOBAL(void)
-jpeg_stdio_src (j_decompress_ptr cinfo, FILE * infile)
-{
-  my_src_ptr src;
-
-  /* The source object including the input buffer is made permanent so that
-   * a series of JPEG images can be read from the same file by calling
-   * jpeg_stdio_src only before the first one.  (If we discarded the buffer
-   * at the end of one image, we'd likely lose the start of the next one.)
-   * This makes it unsafe to use this manager and a different source
-   * manager serially with the same JPEG object.  Caveat programmer.
-   */
-  if (cinfo->src == NULL) {	/* first time for this JPEG object? */
-    cinfo->src = (struct jpeg_source_mgr *) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_PERMANENT, SIZEOF(my_source_mgr));
-  }
-
-  src = (my_src_ptr) cinfo->src;
-  src->pub.init_source = init_source;
-  src->pub.fill_input_buffer = fill_input_buffer;
-  src->pub.skip_input_data = skip_input_data;
-  src->pub.resync_to_restart = jpeg_resync_to_restart; /* use default method */
-  src->pub.term_source = term_source;
-  src->infile = infile;
-  src->pub.bytes_in_buffer = 0; /* forces fill_input_buffer on first read */
-  src->pub.next_input_byte = NULL; /* until buffer loaded */
-}
-
-
-/*
- * Prepare for input from a supplied memory buffer.
- * The buffer must contain the whole JPEG data.
- */
-
-GLOBAL(void)
-jpeg_mem_src (j_decompress_ptr cinfo,
-	      const unsigned char * inbuffer, size_t insize)
-{
-  struct jpeg_source_mgr * src;
-
-  if (inbuffer == NULL || insize == 0)	/* Treat empty input as fatal error */
-    ERREXIT(cinfo, JERR_INPUT_EMPTY);
-
-  /* The source object is made permanent so that a series of JPEG images
-   * can be read from the same buffer by calling jpeg_mem_src only before
-   * the first one.
-   */
-  if (cinfo->src == NULL) {	/* first time for this JPEG object? */
-    cinfo->src = (struct jpeg_source_mgr *) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_PERMANENT, SIZEOF(struct jpeg_source_mgr));
-  }
-
-  src = cinfo->src;
-  src->init_source = init_mem_source;
-  src->fill_input_buffer = fill_mem_input_buffer;
-  src->skip_input_data = skip_input_data;
-  src->resync_to_restart = jpeg_resync_to_restart; /* use default method */
-  src->term_source = term_source;
-  src->bytes_in_buffer = insize;
-  src->next_input_byte = (const JOCTET *) inbuffer;
-}
diff --git a/dep/libjpeg/src/jdcoefct.c b/dep/libjpeg/src/jdcoefct.c
deleted file mode 100644
index 79ba42014..000000000
--- a/dep/libjpeg/src/jdcoefct.c
+++ /dev/null
@@ -1,744 +0,0 @@
-/*
- * jdcoefct.c
- *
- * Copyright (C) 1994-1997, Thomas G. Lane.
- * Modified 2002-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the coefficient buffer controller for decompression.
- * This controller is the top level of the JPEG decompressor proper.
- * The coefficient buffer lies between entropy decoding and inverse-DCT steps.
- *
- * In buffered-image mode, this controller is the interface between
- * input-oriented processing and output-oriented processing.
- * Also, the input side (only) is used when reading a file for transcoding.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Block smoothing is only applicable for progressive JPEG, so: */
-#ifndef D_PROGRESSIVE_SUPPORTED
-#undef BLOCK_SMOOTHING_SUPPORTED
-#endif
-
-
-/* Private buffer controller object */
-
-typedef struct {
-  struct jpeg_d_coef_controller pub; /* public fields */
-
-  /* These variables keep track of the current location of the input side. */
-  /* cinfo->input_iMCU_row is also used for this. */
-  JDIMENSION MCU_ctr;		/* counts MCUs processed in current row */
-  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
-  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
-
-  /* The output side's location is represented by cinfo->output_iMCU_row. */
-
-  /* In single-pass modes, it's sufficient to buffer just one MCU.
-   * We append a workspace of D_MAX_BLOCKS_IN_MCU coefficient blocks,
-   * and let the entropy decoder write into that workspace each time.
-   * In multi-pass modes, this array points to the current MCU's blocks
-   * within the virtual arrays; it is used only by the input side.
-   */
-  JBLOCKROW MCU_buffer[D_MAX_BLOCKS_IN_MCU];
-
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-  /* In multi-pass modes, we need a virtual block array for each component. */
-  jvirt_barray_ptr whole_image[MAX_COMPONENTS];
-#endif
-
-#ifdef BLOCK_SMOOTHING_SUPPORTED
-  /* When doing block smoothing, we latch coefficient Al values here */
-  int * coef_bits_latch;
-#define SAVED_COEFS  6		/* we save coef_bits[0..5] */
-#endif
-
-  /* Workspace for single-pass modes (omitted otherwise). */
-  JBLOCK blk_buffer[D_MAX_BLOCKS_IN_MCU];
-} my_coef_controller;
-
-typedef my_coef_controller * my_coef_ptr;
-
-
-/* Forward declarations */
-METHODDEF(int) decompress_onepass
-	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-METHODDEF(int) decompress_data
-	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
-#endif
-#ifdef BLOCK_SMOOTHING_SUPPORTED
-LOCAL(boolean) smoothing_ok JPP((j_decompress_ptr cinfo));
-METHODDEF(int) decompress_smooth_data
-	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
-#endif
-
-
-LOCAL(void)
-start_iMCU_row (j_decompress_ptr cinfo)
-/* Reset within-iMCU-row counters for a new row (input side) */
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-
-  /* In an interleaved scan, an MCU row is the same as an iMCU row.
-   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
-   * But at the bottom of the image, process only what's left.
-   */
-  if (cinfo->comps_in_scan > 1) {
-    coef->MCU_rows_per_iMCU_row = 1;
-  } else {
-    if (cinfo->input_iMCU_row < (cinfo->total_iMCU_rows-1))
-      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
-    else
-      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
-  }
-
-  coef->MCU_ctr = 0;
-  coef->MCU_vert_offset = 0;
-}
-
-
-/*
- * Initialize for an input processing pass.
- */
-
-METHODDEF(void)
-start_input_pass (j_decompress_ptr cinfo)
-{
-  cinfo->input_iMCU_row = 0;
-  start_iMCU_row(cinfo);
-}
-
-
-/*
- * Initialize for an output processing pass.
- */
-
-METHODDEF(void)
-start_output_pass (j_decompress_ptr cinfo)
-{
-#ifdef BLOCK_SMOOTHING_SUPPORTED
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-
-  /* If multipass, check to see whether to use block smoothing on this pass */
-  if (coef->pub.coef_arrays != NULL) {
-    if (cinfo->do_block_smoothing && smoothing_ok(cinfo))
-      coef->pub.decompress_data = decompress_smooth_data;
-    else
-      coef->pub.decompress_data = decompress_data;
-  }
-#endif
-  cinfo->output_iMCU_row = 0;
-}
-
-
-/*
- * Decompress and return some data in the single-pass case.
- * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
- * Input and output must run in lockstep since we have only a one-MCU buffer.
- * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
- *
- * NB: output_buf contains a plane for each component in image,
- * which we index according to the component's SOF position.
- */
-
-METHODDEF(int)
-decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
-  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
-  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
-  int ci, xindex, yindex, yoffset, useful_width;
-  JBLOCKROW blkp;
-  JSAMPARRAY output_ptr;
-  JDIMENSION start_col, output_col;
-  jpeg_component_info *compptr;
-  inverse_DCT_method_ptr inverse_DCT;
-
-  /* Loop to process as much as one whole iMCU row */
-  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
-       yoffset++) {
-    for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col;
-	 MCU_col_num++) {
-      blkp = coef->blk_buffer;	/* pointer to current DCT block within MCU */
-      /* Try to fetch an MCU.  Entropy decoder expects buffer to be zeroed. */
-      if (cinfo->lim_Se)	/* can bypass in DC only case */
-	MEMZERO(blkp, cinfo->blocks_in_MCU * SIZEOF(JBLOCK));
-      if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->MCU_ctr = MCU_col_num;
-	return JPEG_SUSPENDED;
-      }
-      /* Determine where data should go in output_buf and do the IDCT thing.
-       * We skip dummy blocks at the right and bottom edges (but blkp gets
-       * incremented past them!).
-       */
-      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	/* Don't bother to IDCT an uninteresting component. */
-	if (! compptr->component_needed) {
-	  blkp += compptr->MCU_blocks;
-	  continue;
-	}
-	inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index];
-	output_ptr = output_buf[compptr->component_index] +
-	  yoffset * compptr->DCT_v_scaled_size;
-	useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
-						    : compptr->last_col_width;
-	start_col = MCU_col_num * compptr->MCU_sample_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  if (cinfo->input_iMCU_row < last_iMCU_row ||
-	      yoffset + yindex < compptr->last_row_height) {
-	    output_col = start_col;
-	    for (xindex = 0; xindex < useful_width; xindex++) {
-	      (*inverse_DCT) (cinfo, compptr, (JCOEFPTR) (blkp + xindex),
-			      output_ptr, output_col);
-	      output_col += compptr->DCT_h_scaled_size;
-	    }
-	    output_ptr += compptr->DCT_v_scaled_size;
-	  }
-	  blkp += compptr->MCU_width;
-	}
-      }
-    }
-    /* Completed an MCU row, but perhaps not an iMCU row */
-    coef->MCU_ctr = 0;
-  }
-  /* Completed the iMCU row, advance counters for next one */
-  cinfo->output_iMCU_row++;
-  if (++(cinfo->input_iMCU_row) <= last_iMCU_row) {
-    start_iMCU_row(cinfo);
-    return JPEG_ROW_COMPLETED;
-  }
-  /* Completed the scan */
-  (*cinfo->inputctl->finish_input_pass) (cinfo);
-  return JPEG_SCAN_COMPLETED;
-}
-
-
-/*
- * Dummy consume-input routine for single-pass operation.
- */
-
-METHODDEF(int)
-dummy_consume_data (j_decompress_ptr cinfo)
-{
-  return JPEG_SUSPENDED;	/* Always indicate nothing was done */
-}
-
-
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-
-/*
- * Consume input data and store it in the full-image coefficient buffer.
- * We read as much as one fully interleaved MCU row ("iMCU" row) per call,
- * ie, v_samp_factor block rows for each component in the scan.
- * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
- */
-
-METHODDEF(int)
-consume_data (j_decompress_ptr cinfo)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
-  int ci, xindex, yindex, yoffset;
-  JDIMENSION start_col;
-  JBLOCKARRAY blkp;
-  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
-  JBLOCKROW buffer_ptr;
-  jpeg_component_info *compptr;
-
-  /* Align the virtual buffers for the components used in this scan. */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    buffer[ci] = (*cinfo->mem->access_virt_barray)
-      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
-       cinfo->input_iMCU_row * compptr->v_samp_factor,
-       (JDIMENSION) compptr->v_samp_factor, TRUE);
-    /* Note: entropy decoder expects buffer to be zeroed,
-     * but this is handled automatically by the memory manager
-     * because we requested a pre-zeroed array.
-     */
-  }
-
-  /* Loop to process one whole iMCU row */
-  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
-       yoffset++) {
-    for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row;
-	 MCU_col_num++) {
-      /* Construct list of pointers to DCT blocks belonging to this MCU */
-      blkp = coef->MCU_buffer;	/* pointer to current DCT block within MCU */
-      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	start_col = MCU_col_num * compptr->MCU_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  buffer_ptr = buffer[ci][yoffset + yindex] + start_col;
-	  xindex = compptr->MCU_width;
-	  do {
-	    *blkp++ = buffer_ptr++;
-	  } while (--xindex);
-	}
-      }
-      /* Try to fetch the MCU. */
-      if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->MCU_ctr = MCU_col_num;
-	return JPEG_SUSPENDED;
-      }
-    }
-    /* Completed an MCU row, but perhaps not an iMCU row */
-    coef->MCU_ctr = 0;
-  }
-  /* Completed the iMCU row, advance counters for next one */
-  if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
-    start_iMCU_row(cinfo);
-    return JPEG_ROW_COMPLETED;
-  }
-  /* Completed the scan */
-  (*cinfo->inputctl->finish_input_pass) (cinfo);
-  return JPEG_SCAN_COMPLETED;
-}
-
-
-/*
- * Decompress and return some data in the multi-pass case.
- * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
- * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
- *
- * NB: output_buf contains a plane for each component in image.
- */
-
-METHODDEF(int)
-decompress_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
-  JDIMENSION block_num;
-  int ci, block_row, block_rows;
-  JBLOCKARRAY buffer;
-  JBLOCKROW buffer_ptr;
-  JSAMPARRAY output_ptr;
-  JDIMENSION output_col;
-  jpeg_component_info *compptr;
-  inverse_DCT_method_ptr inverse_DCT;
-
-  /* Force some input to be done if we are getting ahead of the input. */
-  while (cinfo->input_scan_number < cinfo->output_scan_number ||
-	 (cinfo->input_scan_number == cinfo->output_scan_number &&
-	  cinfo->input_iMCU_row <= cinfo->output_iMCU_row)) {
-    if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED)
-      return JPEG_SUSPENDED;
-  }
-
-  /* OK, output from the virtual arrays. */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Don't bother to IDCT an uninteresting component. */
-    if (! compptr->component_needed)
-      continue;
-    /* Align the virtual buffer for this component. */
-    buffer = (*cinfo->mem->access_virt_barray)
-      ((j_common_ptr) cinfo, coef->whole_image[ci],
-       cinfo->output_iMCU_row * compptr->v_samp_factor,
-       (JDIMENSION) compptr->v_samp_factor, FALSE);
-    /* Count non-dummy DCT block rows in this iMCU row. */
-    if (cinfo->output_iMCU_row < last_iMCU_row)
-      block_rows = compptr->v_samp_factor;
-    else {
-      /* NB: can't use last_row_height here; it is input-side-dependent! */
-      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
-      if (block_rows == 0) block_rows = compptr->v_samp_factor;
-    }
-    inverse_DCT = cinfo->idct->inverse_DCT[ci];
-    output_ptr = output_buf[ci];
-    /* Loop over all DCT blocks to be processed. */
-    for (block_row = 0; block_row < block_rows; block_row++) {
-      buffer_ptr = buffer[block_row];
-      output_col = 0;
-      for (block_num = 0; block_num < compptr->width_in_blocks; block_num++) {
-	(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr,
-			output_ptr, output_col);
-	buffer_ptr++;
-	output_col += compptr->DCT_h_scaled_size;
-      }
-      output_ptr += compptr->DCT_v_scaled_size;
-    }
-  }
-
-  if (++(cinfo->output_iMCU_row) <= last_iMCU_row)
-    return JPEG_ROW_COMPLETED;
-  return JPEG_SCAN_COMPLETED;
-}
-
-#endif /* D_MULTISCAN_FILES_SUPPORTED */
-
-
-#ifdef BLOCK_SMOOTHING_SUPPORTED
-
-/*
- * This code applies interblock smoothing as described by section K.8
- * of the JPEG standard: the first 5 AC coefficients are estimated from
- * the DC values of a DCT block and its 8 neighboring blocks.
- * We apply smoothing only for progressive JPEG decoding, and only if
- * the coefficients it can estimate are not yet known to full precision.
- */
-
-/* Natural-order array positions of the first 5 zigzag-order coefficients */
-#define Q01_POS  1
-#define Q10_POS  8
-#define Q20_POS  16
-#define Q11_POS  9
-#define Q02_POS  2
-
-/*
- * Determine whether block smoothing is applicable and safe.
- * We also latch the current states of the coef_bits[] entries for the
- * AC coefficients; otherwise, if the input side of the decompressor
- * advances into a new scan, we might think the coefficients are known
- * more accurately than they really are.
- */
-
-LOCAL(boolean)
-smoothing_ok (j_decompress_ptr cinfo)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  boolean smoothing_useful = FALSE;
-  int ci, coefi;
-  jpeg_component_info *compptr;
-  JQUANT_TBL * qtable;
-  int * coef_bits;
-  int * coef_bits_latch;
-
-  if (! cinfo->progressive_mode || cinfo->coef_bits == NULL)
-    return FALSE;
-
-  /* Allocate latch area if not already done */
-  if (coef->coef_bits_latch == NULL)
-    coef->coef_bits_latch = (int *) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       cinfo->num_components * (SAVED_COEFS * SIZEOF(int)));
-  coef_bits_latch = coef->coef_bits_latch;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* All components' quantization values must already be latched. */
-    if ((qtable = compptr->quant_table) == NULL)
-      return FALSE;
-    /* Verify DC & first 5 AC quantizers are nonzero to avoid zero-divide. */
-    if (qtable->quantval[0] == 0 ||
-	qtable->quantval[Q01_POS] == 0 ||
-	qtable->quantval[Q10_POS] == 0 ||
-	qtable->quantval[Q20_POS] == 0 ||
-	qtable->quantval[Q11_POS] == 0 ||
-	qtable->quantval[Q02_POS] == 0)
-      return FALSE;
-    /* DC values must be at least partly known for all components. */
-    coef_bits = cinfo->coef_bits[ci];
-    if (coef_bits[0] < 0)
-      return FALSE;
-    /* Block smoothing is helpful if some AC coefficients remain inaccurate. */
-    for (coefi = 1; coefi <= 5; coefi++) {
-      coef_bits_latch[coefi] = coef_bits[coefi];
-      if (coef_bits[coefi] != 0)
-	smoothing_useful = TRUE;
-    }
-    coef_bits_latch += SAVED_COEFS;
-  }
-
-  return smoothing_useful;
-}
-
-
-/*
- * Variant of decompress_data for use when doing block smoothing.
- */
-
-METHODDEF(int)
-decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
-  JDIMENSION block_num, last_block_column;
-  int ci, block_row, block_rows, access_rows;
-  JBLOCKARRAY buffer;
-  JBLOCKROW buffer_ptr, prev_block_row, next_block_row;
-  JSAMPARRAY output_ptr;
-  JDIMENSION output_col;
-  jpeg_component_info *compptr;
-  inverse_DCT_method_ptr inverse_DCT;
-  boolean first_row, last_row;
-  JBLOCK workspace;
-  int *coef_bits;
-  JQUANT_TBL *quanttbl;
-  INT32 Q00,Q01,Q02,Q10,Q11,Q20, num;
-  int DC1,DC2,DC3,DC4,DC5,DC6,DC7,DC8,DC9;
-  int Al, pred;
-
-  /* Force some input to be done if we are getting ahead of the input. */
-  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
-	 ! cinfo->inputctl->eoi_reached) {
-    if (cinfo->input_scan_number == cinfo->output_scan_number) {
-      /* If input is working on current scan, we ordinarily want it to
-       * have completed the current row.  But if input scan is DC,
-       * we want it to keep one row ahead so that next block row's DC
-       * values are up to date.
-       */
-      JDIMENSION delta = (cinfo->Ss == 0) ? 1 : 0;
-      if (cinfo->input_iMCU_row > cinfo->output_iMCU_row+delta)
-	break;
-    }
-    if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED)
-      return JPEG_SUSPENDED;
-  }
-
-  /* OK, output from the virtual arrays. */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Don't bother to IDCT an uninteresting component. */
-    if (! compptr->component_needed)
-      continue;
-    /* Count non-dummy DCT block rows in this iMCU row. */
-    if (cinfo->output_iMCU_row < last_iMCU_row) {
-      block_rows = compptr->v_samp_factor;
-      access_rows = block_rows * 2; /* this and next iMCU row */
-      last_row = FALSE;
-    } else {
-      /* NB: can't use last_row_height here; it is input-side-dependent! */
-      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
-      if (block_rows == 0) block_rows = compptr->v_samp_factor;
-      access_rows = block_rows; /* this iMCU row only */
-      last_row = TRUE;
-    }
-    /* Align the virtual buffer for this component. */
-    if (cinfo->output_iMCU_row > 0) {
-      access_rows += compptr->v_samp_factor; /* prior iMCU row too */
-      buffer = (*cinfo->mem->access_virt_barray)
-	((j_common_ptr) cinfo, coef->whole_image[ci],
-	 (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor,
-	 (JDIMENSION) access_rows, FALSE);
-      buffer += compptr->v_samp_factor;	/* point to current iMCU row */
-      first_row = FALSE;
-    } else {
-      buffer = (*cinfo->mem->access_virt_barray)
-	((j_common_ptr) cinfo, coef->whole_image[ci],
-	 (JDIMENSION) 0, (JDIMENSION) access_rows, FALSE);
-      first_row = TRUE;
-    }
-    /* Fetch component-dependent info */
-    coef_bits = coef->coef_bits_latch + (ci * SAVED_COEFS);
-    quanttbl = compptr->quant_table;
-    Q00 = quanttbl->quantval[0];
-    Q01 = quanttbl->quantval[Q01_POS];
-    Q10 = quanttbl->quantval[Q10_POS];
-    Q20 = quanttbl->quantval[Q20_POS];
-    Q11 = quanttbl->quantval[Q11_POS];
-    Q02 = quanttbl->quantval[Q02_POS];
-    inverse_DCT = cinfo->idct->inverse_DCT[ci];
-    output_ptr = output_buf[ci];
-    /* Loop over all DCT blocks to be processed. */
-    for (block_row = 0; block_row < block_rows; block_row++) {
-      buffer_ptr = buffer[block_row];
-      if (first_row && block_row == 0)
-	prev_block_row = buffer_ptr;
-      else
-	prev_block_row = buffer[block_row-1];
-      if (last_row && block_row == block_rows-1)
-	next_block_row = buffer_ptr;
-      else
-	next_block_row = buffer[block_row+1];
-      /* We fetch the surrounding DC values using a sliding-register approach.
-       * Initialize all nine here so as to do the right thing on narrow pics.
-       */
-      DC1 = DC2 = DC3 = (int) prev_block_row[0][0];
-      DC4 = DC5 = DC6 = (int) buffer_ptr[0][0];
-      DC7 = DC8 = DC9 = (int) next_block_row[0][0];
-      output_col = 0;
-      last_block_column = compptr->width_in_blocks - 1;
-      for (block_num = 0; block_num <= last_block_column; block_num++) {
-	/* Fetch current DCT block into workspace so we can modify it. */
-	jcopy_block_row(buffer_ptr, (JBLOCKROW) workspace, (JDIMENSION) 1);
-	/* Update DC values */
-	if (block_num < last_block_column) {
-	  DC3 = (int) prev_block_row[1][0];
-	  DC6 = (int) buffer_ptr[1][0];
-	  DC9 = (int) next_block_row[1][0];
-	}
-	/* Compute coefficient estimates per K.8.
-	 * An estimate is applied only if coefficient is still zero,
-	 * and is not known to be fully accurate.
-	 */
-	/* AC01 */
-	if ((Al=coef_bits[1]) != 0 && workspace[1] == 0) {
-	  num = 36 * Q00 * (DC4 - DC6);
-	  if (num >= 0) {
-	    pred = (int) (((Q01<<7) + num) / (Q01<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q01<<7) - num) / (Q01<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[1] = (JCOEF) pred;
-	}
-	/* AC10 */
-	if ((Al=coef_bits[2]) != 0 && workspace[8] == 0) {
-	  num = 36 * Q00 * (DC2 - DC8);
-	  if (num >= 0) {
-	    pred = (int) (((Q10<<7) + num) / (Q10<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q10<<7) - num) / (Q10<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[8] = (JCOEF) pred;
-	}
-	/* AC20 */
-	if ((Al=coef_bits[3]) != 0 && workspace[16] == 0) {
-	  num = 9 * Q00 * (DC2 + DC8 - 2*DC5);
-	  if (num >= 0) {
-	    pred = (int) (((Q20<<7) + num) / (Q20<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q20<<7) - num) / (Q20<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[16] = (JCOEF) pred;
-	}
-	/* AC11 */
-	if ((Al=coef_bits[4]) != 0 && workspace[9] == 0) {
-	  num = 5 * Q00 * (DC1 - DC3 - DC7 + DC9);
-	  if (num >= 0) {
-	    pred = (int) (((Q11<<7) + num) / (Q11<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q11<<7) - num) / (Q11<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[9] = (JCOEF) pred;
-	}
-	/* AC02 */
-	if ((Al=coef_bits[5]) != 0 && workspace[2] == 0) {
-	  num = 9 * Q00 * (DC4 + DC6 - 2*DC5);
-	  if (num >= 0) {
-	    pred = (int) (((Q02<<7) + num) / (Q02<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q02<<7) - num) / (Q02<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[2] = (JCOEF) pred;
-	}
-	/* OK, do the IDCT */
-	(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) workspace,
-			output_ptr, output_col);
-	/* Advance for next column */
-	DC1 = DC2; DC2 = DC3;
-	DC4 = DC5; DC5 = DC6;
-	DC7 = DC8; DC8 = DC9;
-	buffer_ptr++, prev_block_row++, next_block_row++;
-	output_col += compptr->DCT_h_scaled_size;
-      }
-      output_ptr += compptr->DCT_v_scaled_size;
-    }
-  }
-
-  if (++(cinfo->output_iMCU_row) <= last_iMCU_row)
-    return JPEG_ROW_COMPLETED;
-  return JPEG_SCAN_COMPLETED;
-}
-
-#endif /* BLOCK_SMOOTHING_SUPPORTED */
-
-
-/*
- * Initialize coefficient buffer controller.
- */
-
-GLOBAL(void)
-jinit_d_coef_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
-{
-  my_coef_ptr coef;
-
-  if (need_full_buffer) {
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-    /* Allocate a full-image virtual array for each component, */
-    /* padded to a multiple of samp_factor DCT blocks in each direction. */
-    /* Note we ask for a pre-zeroed array. */
-    int ci, access_rows;
-    jpeg_component_info *compptr;
-
-    coef = (my_coef_ptr) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       SIZEOF(my_coef_controller) - SIZEOF(coef->blk_buffer));
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      access_rows = compptr->v_samp_factor;
-#ifdef BLOCK_SMOOTHING_SUPPORTED
-      /* If block smoothing could be used, need a bigger window */
-      if (cinfo->progressive_mode)
-	access_rows *= 3;
-#endif
-      coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE, TRUE,
-	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
-				(long) compptr->h_samp_factor),
-	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
-				(long) compptr->v_samp_factor),
-	 (JDIMENSION) access_rows);
-    }
-    coef->pub.consume_data = consume_data;
-    coef->pub.decompress_data = decompress_data;
-    coef->pub.coef_arrays = coef->whole_image; /* link to virtual arrays */
-#else
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif
-  } else {
-    /* We only need a single-MCU buffer. */
-    JBLOCKARRAY blkp;
-    JBLOCKROW buffer_ptr;
-    int bi;
-
-    coef = (my_coef_ptr) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_coef_controller));
-    buffer_ptr = coef->blk_buffer;
-    if (cinfo->lim_Se == 0)	/* DC only case: want to bypass later */
-      MEMZERO(buffer_ptr, SIZEOF(coef->blk_buffer));
-    blkp = coef->MCU_buffer;
-    bi = D_MAX_BLOCKS_IN_MCU;
-    do {
-      *blkp++ = buffer_ptr++;
-    } while (--bi);
-    coef->pub.consume_data = dummy_consume_data;
-    coef->pub.decompress_data = decompress_onepass;
-    coef->pub.coef_arrays = NULL; /* flag for no virtual arrays */
-  }
-
-  coef->pub.start_input_pass = start_input_pass;
-  coef->pub.start_output_pass = start_output_pass;
-#ifdef BLOCK_SMOOTHING_SUPPORTED
-  coef->coef_bits_latch = NULL;
-#endif
-  cinfo->coef = &coef->pub;
-}
diff --git a/dep/libjpeg/src/jdcolor.c b/dep/libjpeg/src/jdcolor.c
deleted file mode 100644
index 6b40fb534..000000000
--- a/dep/libjpeg/src/jdcolor.c
+++ /dev/null
@@ -1,769 +0,0 @@
-/*
- * jdcolor.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2011-2023 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains output colorspace conversion routines.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-#if RANGE_BITS < 2
-  /* Deliberate syntax err */
-  Sorry, this code requires 2 or more range extension bits.
-#endif
-
-
-/* Private subobject */
-
-typedef struct {
-  struct jpeg_color_deconverter pub; /* public fields */
-
-  /* Private state for YCbCr->RGB and BG_YCC->RGB conversion */
-  int * Cr_r_tab;		/* => table for Cr to R conversion */
-  int * Cb_b_tab;		/* => table for Cb to B conversion */
-  INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
-  INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
-
-  /* Private state for RGB->Y conversion */
-  INT32 * R_y_tab;		/* => table for R to Y conversion */
-  INT32 * G_y_tab;		/* => table for G to Y conversion */
-  INT32 * B_y_tab;		/* => table for B to Y conversion */
-} my_color_deconverter;
-
-typedef my_color_deconverter * my_cconvert_ptr;
-
-
-/***************  YCbCr -> RGB conversion: most common case **************/
-/*************** BG_YCC -> RGB conversion: less common case **************/
-/***************    RGB -> Y   conversion: less common case **************/
-
-/*
- * YCbCr is defined per Recommendation ITU-R BT.601-7 (03/2011),
- * previously known as Recommendation CCIR 601-1, except that Cb and Cr
- * are normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
- * sRGB (standard RGB color space) is defined per IEC 61966-2-1:1999.
- * sYCC (standard luma-chroma-chroma color space with extended gamut)
- * is defined per IEC 61966-2-1:1999 Amendment A1:2003 Annex F.
- * bg-sRGB and bg-sYCC (big gamut standard color spaces)
- * are defined per IEC 61966-2-1:1999 Amendment A1:2003 Annex G.
- * Note that the derived conversion coefficients given in some of these
- * documents are imprecise.  The general conversion equations are
- *
- *	R = Y + K * (1 - Kr) * Cr
- *	G = Y - K * (Kb * (1 - Kb) * Cb + Kr * (1 - Kr) * Cr) / (1 - Kr - Kb)
- *	B = Y + K * (1 - Kb) * Cb
- *
- *	Y = Kr * R + (1 - Kr - Kb) * G + Kb * B
- *
- * With Kr = 0.299 and Kb = 0.114 (derived according to SMPTE RP 177-1993
- * from the 1953 FCC NTSC primaries and CIE Illuminant C), K = 2 for sYCC,
- * the conversion equations to be implemented are therefore
- *
- *	R = Y + 1.402 * Cr
- *	G = Y - 0.344136286 * Cb - 0.714136286 * Cr
- *	B = Y + 1.772 * Cb
- *
- *	Y = 0.299 * R + 0.587 * G + 0.114 * B
- *
- * where Cb and Cr represent the incoming values less CENTERJSAMPLE.
- * For bg-sYCC, with K = 4, the equations are
- *
- *	R = Y + 2.804 * Cr
- *	G = Y - 0.688272572 * Cb - 1.428272572 * Cr
- *	B = Y + 3.544 * Cb
- *
- * To avoid floating-point arithmetic, we represent the fractional constants
- * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
- * the products by 2^16, with appropriate rounding, to get the correct answer.
- * Notice that Y, being an integral input, does not contribute any fraction
- * so it need not participate in the rounding.
- *
- * For even more speed, we avoid doing any multiplications in the inner loop
- * by precalculating the constants times Cb and Cr for all possible values.
- * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
- * for 9-bit to 12-bit samples it is still acceptable.  It's not very
- * reasonable for 16-bit samples, but if you want lossless storage
- * you shouldn't be changing colorspace anyway.
- * The Cr=>R and Cb=>B values can be rounded to integers in advance;
- * the values for the G calculation are left scaled up,
- * since we must add them together before rounding.
- */
-
-#define SCALEBITS	16	/* speediest right-shift on some machines */
-#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
-#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
-
-
-/*
- * Initialize tables for YCbCr->RGB and BG_YCC->RGB colorspace conversion.
- */
-
-LOCAL(void)
-build_ycc_rgb_table (j_decompress_ptr cinfo)
-/* Normal case, sYCC */
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  int i;
-  INT32 x;
-  SHIFT_TEMPS
-
-  cconvert->Cr_r_tab = (int *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
-  cconvert->Cb_b_tab = (int *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
-  cconvert->Cr_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
-  cconvert->Cb_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
-
-  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
-    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
-    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
-    /* Cr=>R value is nearest int to 1.402 * x */
-    cconvert->Cr_r_tab[i] = (int) DESCALE(FIX(1.402) * x, SCALEBITS);
-    /* Cb=>B value is nearest int to 1.772 * x */
-    cconvert->Cb_b_tab[i] = (int) DESCALE(FIX(1.772) * x, SCALEBITS);
-    /* Cr=>G value is scaled-up -0.714136286 * x */
-    cconvert->Cr_g_tab[i] = (- FIX(0.714136286)) * x;
-    /* Cb=>G value is scaled-up -0.344136286 * x */
-    /* We also add in ONE_HALF so that need not do it in inner loop */
-    cconvert->Cb_g_tab[i] = (- FIX(0.344136286)) * x + ONE_HALF;
-  }
-}
-
-
-LOCAL(void)
-build_bg_ycc_rgb_table (j_decompress_ptr cinfo)
-/* Wide gamut case, bg-sYCC */
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  int i;
-  INT32 x;
-  SHIFT_TEMPS
-
-  cconvert->Cr_r_tab = (int *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
-  cconvert->Cb_b_tab = (int *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
-  cconvert->Cr_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
-  cconvert->Cb_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
-
-  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
-    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
-    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
-    /* Cr=>R value is nearest int to 2.804 * x */
-    cconvert->Cr_r_tab[i] = (int) DESCALE(FIX(2.804) * x, SCALEBITS);
-    /* Cb=>B value is nearest int to 3.544 * x */
-    cconvert->Cb_b_tab[i] = (int) DESCALE(FIX(3.544) * x, SCALEBITS);
-    /* Cr=>G value is scaled-up -1.428272572 * x */
-    cconvert->Cr_g_tab[i] = (- FIX(1.428272572)) * x;
-    /* Cb=>G value is scaled-up -0.688272572 * x */
-    /* We also add in ONE_HALF so that need not do it in inner loop */
-    cconvert->Cb_g_tab[i] = (- FIX(0.688272572)) * x + ONE_HALF;
-  }
-}
-
-
-/*
- * Convert some rows of samples to the output colorspace.
- *
- * Note that we change from noninterleaved, one-plane-per-component format
- * to interleaved-pixel format.  The output buffer is therefore three times
- * as wide as the input buffer.
- *
- * A starting row offset is provided only for the input buffer.  The caller
- * can easily adjust the passed output_buf value to accommodate any row
- * offset required on that side.
- */
-
-METHODDEF(void)
-ycc_rgb_convert (j_decompress_ptr cinfo,
-		 JSAMPIMAGE input_buf, JDIMENSION input_row,
-		 JSAMPARRAY output_buf, int num_rows)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register int y, cb, cr;
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
-  /* copy these pointers into registers if possible */
-  register JSAMPLE * range_limit = cinfo->sample_range_limit;
-  register int * Crrtab = cconvert->Cr_r_tab;
-  register int * Cbbtab = cconvert->Cb_b_tab;
-  register INT32 * Crgtab = cconvert->Cr_g_tab;
-  register INT32 * Cbgtab = cconvert->Cb_g_tab;
-  SHIFT_TEMPS
-
-  while (--num_rows >= 0) {
-    inptr0 = input_buf[0][input_row];
-    inptr1 = input_buf[1][input_row];
-    inptr2 = input_buf[2][input_row];
-    input_row++;
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      y  = GETJSAMPLE(inptr0[col]);
-      cb = GETJSAMPLE(inptr1[col]);
-      cr = GETJSAMPLE(inptr2[col]);
-      /* Range-limiting is essential due to noise introduced by DCT losses,
-       * for extended gamut (sYCC) and wide gamut (bg-sYCC) encodings.
-       */
-      outptr[RGB_RED]   = range_limit[y + Crrtab[cr]];
-      outptr[RGB_GREEN] = range_limit[y +
-			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
-						 SCALEBITS))];
-      outptr[RGB_BLUE]  = range_limit[y + Cbbtab[cb]];
-      outptr += RGB_PIXELSIZE;
-    }
-  }
-}
-
-
-/**************** Cases other than YCC -> RGB ****************/
-
-
-/*
- * Initialize for RGB->grayscale colorspace conversion.
- */
-
-LOCAL(void)
-build_rgb_y_table (j_decompress_ptr cinfo)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  INT32 i;
-
-  cconvert->R_y_tab = (INT32 *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
-  cconvert->G_y_tab = (INT32 *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
-  cconvert->B_y_tab = (INT32 *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
-
-  for (i = 0; i <= MAXJSAMPLE; i++) {
-    cconvert->R_y_tab[i] = FIX(0.299) * i;
-    cconvert->G_y_tab[i] = FIX(0.587) * i;
-    cconvert->B_y_tab[i] = FIX(0.114) * i + ONE_HALF;
-  }
-}
-
-
-/*
- * Convert RGB to grayscale.
- */
-
-METHODDEF(void)
-rgb_gray_convert (j_decompress_ptr cinfo,
-		  JSAMPIMAGE input_buf, JDIMENSION input_row,
-		  JSAMPARRAY output_buf, int num_rows)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register INT32 y;
-  register INT32 * Rytab = cconvert->R_y_tab;
-  register INT32 * Gytab = cconvert->G_y_tab;
-  register INT32 * Bytab = cconvert->B_y_tab;
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
-
-  while (--num_rows >= 0) {
-    inptr0 = input_buf[0][input_row];
-    inptr1 = input_buf[1][input_row];
-    inptr2 = input_buf[2][input_row];
-    input_row++;
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      y  = Rytab[GETJSAMPLE(inptr0[col])];
-      y += Gytab[GETJSAMPLE(inptr1[col])];
-      y += Bytab[GETJSAMPLE(inptr2[col])];
-      outptr[col] = (JSAMPLE) (y >> SCALEBITS);
-    }
-  }
-}
-
-
-/*
- * Convert some rows of samples to the output colorspace.
- * [R-G,G,B-G] to [R,G,B] conversion with modulo calculation
- * (inverse color transform).
- * This can be seen as an adaption of the general YCbCr->RGB
- * conversion equation with Kr = Kb = 0, while replacing the
- * normalization by modulo calculation.
- */
-
-METHODDEF(void)
-rgb1_rgb_convert (j_decompress_ptr cinfo,
-		  JSAMPIMAGE input_buf, JDIMENSION input_row,
-		  JSAMPARRAY output_buf, int num_rows)
-{
-  register int r, g, b;
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
-
-  while (--num_rows >= 0) {
-    inptr0 = input_buf[0][input_row];
-    inptr1 = input_buf[1][input_row];
-    inptr2 = input_buf[2][input_row];
-    input_row++;
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      r = GETJSAMPLE(inptr0[col]);
-      g = GETJSAMPLE(inptr1[col]);
-      b = GETJSAMPLE(inptr2[col]);
-      /* Assume that MAXJSAMPLE+1 is a power of 2, so that the MOD
-       * (modulo) operator is equivalent to the bitmask operator AND.
-       */
-      outptr[RGB_RED]   = (JSAMPLE) ((r + g - CENTERJSAMPLE) & MAXJSAMPLE);
-      outptr[RGB_GREEN] = (JSAMPLE) g;
-      outptr[RGB_BLUE]  = (JSAMPLE) ((b + g - CENTERJSAMPLE) & MAXJSAMPLE);
-      outptr += RGB_PIXELSIZE;
-    }
-  }
-}
-
-
-/*
- * [R-G,G,B-G] to grayscale conversion with modulo calculation
- * (inverse color transform).
- */
-
-METHODDEF(void)
-rgb1_gray_convert (j_decompress_ptr cinfo,
-		   JSAMPIMAGE input_buf, JDIMENSION input_row,
-		   JSAMPARRAY output_buf, int num_rows)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register int r, g, b;
-  register INT32 y;
-  register INT32 * Rytab = cconvert->R_y_tab;
-  register INT32 * Gytab = cconvert->G_y_tab;
-  register INT32 * Bytab = cconvert->B_y_tab;
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
-
-  while (--num_rows >= 0) {
-    inptr0 = input_buf[0][input_row];
-    inptr1 = input_buf[1][input_row];
-    inptr2 = input_buf[2][input_row];
-    input_row++;
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      r = GETJSAMPLE(inptr0[col]);
-      g = GETJSAMPLE(inptr1[col]);
-      b = GETJSAMPLE(inptr2[col]);
-      /* Assume that MAXJSAMPLE+1 is a power of 2, so that the MOD
-       * (modulo) operator is equivalent to the bitmask operator AND.
-       */
-      y  = Rytab[(r + g - CENTERJSAMPLE) & MAXJSAMPLE];
-      y += Gytab[g];
-      y += Bytab[(b + g - CENTERJSAMPLE) & MAXJSAMPLE];
-      outptr[col] = (JSAMPLE) (y >> SCALEBITS);
-    }
-  }
-}
-
-
-/*
- * Convert some rows of samples to the output colorspace.
- * No colorspace change, but conversion from separate-planes
- * to interleaved representation.
- */
-
-METHODDEF(void)
-rgb_convert (j_decompress_ptr cinfo,
-	     JSAMPIMAGE input_buf, JDIMENSION input_row,
-	     JSAMPARRAY output_buf, int num_rows)
-{
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
-
-  while (--num_rows >= 0) {
-    inptr0 = input_buf[0][input_row];
-    inptr1 = input_buf[1][input_row];
-    inptr2 = input_buf[2][input_row];
-    input_row++;
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      /* We can dispense with GETJSAMPLE() here */
-      outptr[RGB_RED]   = inptr0[col];
-      outptr[RGB_GREEN] = inptr1[col];
-      outptr[RGB_BLUE]  = inptr2[col];
-      outptr += RGB_PIXELSIZE;
-    }
-  }
-}
-
-
-/*
- * Color conversion for no colorspace change: just copy the data,
- * converting from separate-planes to interleaved representation.
- * Note: Omit uninteresting components in output buffer.
- */
-
-METHODDEF(void)
-null_convert (j_decompress_ptr cinfo,
-	      JSAMPIMAGE input_buf, JDIMENSION input_row,
-	      JSAMPARRAY output_buf, int num_rows)
-{
-  register JSAMPROW outptr;
-  register JSAMPROW inptr;
-  register JDIMENSION count;
-  register int out_comps = cinfo->out_color_components;
-  JDIMENSION num_cols = cinfo->output_width;
-  JSAMPROW startptr;
-  int ci;
-  jpeg_component_info *compptr;
-
-  while (--num_rows >= 0) {
-    /* It seems fastest to make a separate pass for each component. */
-    startptr = *output_buf++;
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      if (! compptr->component_needed)
-	continue;		/* skip uninteresting component */
-      inptr = input_buf[ci][input_row];
-      outptr = startptr++;
-      for (count = num_cols; count > 0; count--) {
-	*outptr = *inptr++;	/* don't need GETJSAMPLE() here */
-	outptr += out_comps;
-      }
-    }
-    input_row++;
-  }
-}
-
-
-/*
- * Color conversion for grayscale: just copy the data.
- * This also works for YCC -> grayscale conversion, in which
- * we just copy the Y (luminance) component and ignore chrominance.
- */
-
-METHODDEF(void)
-grayscale_convert (j_decompress_ptr cinfo,
-		   JSAMPIMAGE input_buf, JDIMENSION input_row,
-		   JSAMPARRAY output_buf, int num_rows)
-{
-  jcopy_sample_rows(input_buf[0] + input_row, output_buf,
-		    num_rows, cinfo->output_width);
-}
-
-
-/*
- * Convert grayscale to RGB: just duplicate the graylevel three times.
- * This is provided to support applications that don't want to cope
- * with grayscale as a separate case.
- */
-
-METHODDEF(void)
-gray_rgb_convert (j_decompress_ptr cinfo,
-		  JSAMPIMAGE input_buf, JDIMENSION input_row,
-		  JSAMPARRAY output_buf, int num_rows)
-{
-  register JSAMPROW outptr;
-  register JSAMPROW inptr;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
-
-  while (--num_rows >= 0) {
-    inptr = input_buf[0][input_row++];
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      /* We can dispense with GETJSAMPLE() here */
-      outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
-      outptr += RGB_PIXELSIZE;
-    }
-  }
-}
-
-
-/*
- * Convert some rows of samples to the output colorspace.
- * This version handles Adobe-style YCCK->CMYK conversion,
- * where we convert YCbCr to R=1-C, G=1-M, and B=1-Y using the
- * same conversion as above, while passing K (black) unchanged.
- * We assume build_ycc_rgb_table has been called.
- */
-
-METHODDEF(void)
-ycck_cmyk_convert (j_decompress_ptr cinfo,
-		   JSAMPIMAGE input_buf, JDIMENSION input_row,
-		   JSAMPARRAY output_buf, int num_rows)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register int y, cb, cr;
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2, inptr3;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
-  /* copy these pointers into registers if possible */
-  register JSAMPLE * range_limit = cinfo->sample_range_limit;
-  register int * Crrtab = cconvert->Cr_r_tab;
-  register int * Cbbtab = cconvert->Cb_b_tab;
-  register INT32 * Crgtab = cconvert->Cr_g_tab;
-  register INT32 * Cbgtab = cconvert->Cb_g_tab;
-  SHIFT_TEMPS
-
-  while (--num_rows >= 0) {
-    inptr0 = input_buf[0][input_row];
-    inptr1 = input_buf[1][input_row];
-    inptr2 = input_buf[2][input_row];
-    inptr3 = input_buf[3][input_row];
-    input_row++;
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      y  = GETJSAMPLE(inptr0[col]);
-      cb = GETJSAMPLE(inptr1[col]);
-      cr = GETJSAMPLE(inptr2[col]);
-      /* Range-limiting is essential due to noise introduced by DCT losses,
-       * and for extended gamut encodings (sYCC).
-       */
-      outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])];	/* red */
-      outptr[1] = range_limit[MAXJSAMPLE - (y +			/* green */
-			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
-						 SCALEBITS)))];
-      outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])];	/* blue */
-      /* K passes through unchanged */
-      outptr[3] = inptr3[col];	/* don't need GETJSAMPLE here */
-      outptr += 4;
-    }
-  }
-}
-
-
-/*
- * Convert CMYK to YK part of YCCK for colorless output.
- * We assume build_rgb_y_table has been called.
- */
-
-METHODDEF(void)
-cmyk_yk_convert (j_decompress_ptr cinfo,
-		 JSAMPIMAGE input_buf, JDIMENSION input_row,
-		 JSAMPARRAY output_buf, int num_rows)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register INT32 y;
-  register INT32 * Rytab = cconvert->R_y_tab;
-  register INT32 * Gytab = cconvert->G_y_tab;
-  register INT32 * Bytab = cconvert->B_y_tab;
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2, inptr3;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
-
-  while (--num_rows >= 0) {
-    inptr0 = input_buf[0][input_row];
-    inptr1 = input_buf[1][input_row];
-    inptr2 = input_buf[2][input_row];
-    inptr3 = input_buf[3][input_row];
-    input_row++;
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      y  = Rytab[MAXJSAMPLE - GETJSAMPLE(inptr0[col])];
-      y += Gytab[MAXJSAMPLE - GETJSAMPLE(inptr1[col])];
-      y += Bytab[MAXJSAMPLE - GETJSAMPLE(inptr2[col])];
-      outptr[0] = (JSAMPLE) (y >> SCALEBITS);
-      /* K passes through unchanged */
-      outptr[1] = inptr3[col];	/* don't need GETJSAMPLE here */
-      outptr += 2;
-    }
-  }
-}
-
-
-/*
- * Empty method for start_pass.
- */
-
-METHODDEF(void)
-start_pass_dcolor (j_decompress_ptr cinfo)
-{
-  /* no work needed */
-}
-
-
-/*
- * Module initialization routine for output colorspace conversion.
- */
-
-GLOBAL(void)
-jinit_color_deconverter (j_decompress_ptr cinfo)
-{
-  my_cconvert_ptr cconvert;
-  int ci, i;
-
-  cconvert = (my_cconvert_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_color_deconverter));
-  cinfo->cconvert = &cconvert->pub;
-  cconvert->pub.start_pass = start_pass_dcolor;
-
-  /* Make sure num_components agrees with jpeg_color_space */
-  switch (cinfo->jpeg_color_space) {
-  case JCS_GRAYSCALE:
-    if (cinfo->num_components != 1)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    break;
-
-  case JCS_RGB:
-  case JCS_YCbCr:
-  case JCS_BG_RGB:
-  case JCS_BG_YCC:
-    if (cinfo->num_components != 3)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    break;
-
-  case JCS_CMYK:
-  case JCS_YCCK:
-    if (cinfo->num_components != 4)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    break;
-
-  default:			/* JCS_UNKNOWN can be anything */
-    if (cinfo->num_components < 1)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-  }
-
-  /* Support color transform only for RGB colorspaces */
-  if (cinfo->color_transform &&
-      cinfo->jpeg_color_space != JCS_RGB &&
-      cinfo->jpeg_color_space != JCS_BG_RGB)
-    ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-
-  /* Set out_color_components and conversion method based on requested space.
-   * Also adjust the component_needed flags for any unused components,
-   * so that earlier pipeline stages can avoid useless computation.
-   */
-
-  switch (cinfo->out_color_space) {
-  case JCS_GRAYSCALE:
-    cinfo->out_color_components = 1;
-    switch (cinfo->jpeg_color_space) {
-    case JCS_GRAYSCALE:
-    case JCS_YCbCr:
-    case JCS_BG_YCC:
-      cconvert->pub.color_convert = grayscale_convert;
-      /* For color->grayscale conversion, only the Y (0) component is needed */
-      for (ci = 1; ci < cinfo->num_components; ci++)
-	cinfo->comp_info[ci].component_needed = FALSE;
-      break;
-    case JCS_RGB:
-      switch (cinfo->color_transform) {
-      case JCT_NONE:
-	cconvert->pub.color_convert = rgb_gray_convert;
-	break;
-      case JCT_SUBTRACT_GREEN:
-	cconvert->pub.color_convert = rgb1_gray_convert;
-	break;
-      default:
-	ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-      }
-      build_rgb_y_table(cinfo);
-      break;
-    default:
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    }
-    break;
-
-  case JCS_RGB:
-    cinfo->out_color_components = RGB_PIXELSIZE;
-    switch (cinfo->jpeg_color_space) {
-    case JCS_GRAYSCALE:
-      cconvert->pub.color_convert = gray_rgb_convert;
-      break;
-    case JCS_YCbCr:
-      cconvert->pub.color_convert = ycc_rgb_convert;
-      build_ycc_rgb_table(cinfo);
-      break;
-    case JCS_BG_YCC:
-      cconvert->pub.color_convert = ycc_rgb_convert;
-      build_bg_ycc_rgb_table(cinfo);
-      break;
-    case JCS_RGB:
-      switch (cinfo->color_transform) {
-      case JCT_NONE:
-	cconvert->pub.color_convert = rgb_convert;
-	break;
-      case JCT_SUBTRACT_GREEN:
-	cconvert->pub.color_convert = rgb1_rgb_convert;
-	break;
-      default:
-	ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-      }
-      break;
-    default:
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    }
-    break;
-
-  case JCS_BG_RGB:
-    if (cinfo->jpeg_color_space != JCS_BG_RGB)
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    cinfo->out_color_components = RGB_PIXELSIZE;
-    switch (cinfo->color_transform) {
-    case JCT_NONE:
-      cconvert->pub.color_convert = rgb_convert;
-      break;
-    case JCT_SUBTRACT_GREEN:
-      cconvert->pub.color_convert = rgb1_rgb_convert;
-      break;
-    default:
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    }
-    break;
-
-  case JCS_CMYK:
-    if (cinfo->jpeg_color_space != JCS_YCCK)
-      goto def_label;
-    cinfo->out_color_components = 4;
-    cconvert->pub.color_convert = ycck_cmyk_convert;
-    build_ycc_rgb_table(cinfo);
-    break;
-
-  case JCS_YCCK:
-    if (cinfo->jpeg_color_space != JCS_CMYK ||
-	/* Support only YK part of YCCK for colorless output */
-	! cinfo->comp_info[0].component_needed ||
-	  cinfo->comp_info[1].component_needed ||
-	  cinfo->comp_info[2].component_needed ||
-	! cinfo->comp_info[3].component_needed)
-      goto def_label;
-    cinfo->out_color_components = 2;
-    /* Need all components on input side */
-    cinfo->comp_info[1].component_needed = TRUE;
-    cinfo->comp_info[2].component_needed = TRUE;
-    cconvert->pub.color_convert = cmyk_yk_convert;
-    build_rgb_y_table(cinfo);
-    break;
-
-  default: def_label:	/* permit null conversion to same output space */
-    if (cinfo->out_color_space != cinfo->jpeg_color_space)
-      /* unsupported non-null conversion */
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    i = 0;
-    for (ci = 0; ci < cinfo->num_components; ci++)
-      if (cinfo->comp_info[ci].component_needed)
-	i++;		/* count output color components */
-    cinfo->out_color_components = i;
-    cconvert->pub.color_convert = null_convert;
-  }
-
-  if (cinfo->quantize_colors)
-    cinfo->output_components = 1; /* single colormapped output component */
-  else
-    cinfo->output_components = cinfo->out_color_components;
-}
diff --git a/dep/libjpeg/src/jdct.h b/dep/libjpeg/src/jdct.h
deleted file mode 100644
index 0f251590c..000000000
--- a/dep/libjpeg/src/jdct.h
+++ /dev/null
@@ -1,409 +0,0 @@
-/*
- * jdct.h
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2002-2023 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This include file contains common declarations for the forward and
- * inverse DCT modules.  These declarations are private to the DCT managers
- * (jcdctmgr.c, jddctmgr.c) and the individual DCT algorithms.
- * The individual DCT algorithms are kept in separate files to ease 
- * machine-dependent tuning (e.g., assembly coding).
- */
-
-
-/*
- * A forward DCT routine is given a pointer to an input sample array and
- * a pointer to a work area of type DCTELEM[]; the DCT is to be performed
- * in-place in that buffer.  Type DCTELEM is int for 8-bit samples, INT32
- * for 12-bit samples.  (NOTE: Floating-point DCT implementations use an
- * array of type FAST_FLOAT, instead.)
- * The input data is to be fetched from the sample array starting at a
- * specified column.  (Any row offset needed will be applied to the array
- * pointer before it is passed to the FDCT code.)
- * Note that the number of samples fetched by the FDCT routine is
- * DCT_h_scaled_size * DCT_v_scaled_size.
- * The DCT outputs are returned scaled up by a factor of 8; they therefore
- * have a range of +-8K for 8-bit data, +-128K for 12-bit data.  This
- * convention improves accuracy in integer implementations and saves some
- * work in floating-point ones.
- * Quantization of the output coefficients is done by jcdctmgr.c.
- */
-
-#if BITS_IN_JSAMPLE == 8
-typedef int DCTELEM;		/* 16 or 32 bits is fine */
-#else
-typedef INT32 DCTELEM;		/* must have 32 bits */
-#endif
-
-typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data,
-					       JSAMPARRAY sample_data,
-					       JDIMENSION start_col));
-typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data,
-					     JSAMPARRAY sample_data,
-					     JDIMENSION start_col));
-
-
-/*
- * An inverse DCT routine is given a pointer to the input JBLOCK and a pointer
- * to an output sample array.  The routine must dequantize the input data as
- * well as perform the IDCT; for dequantization, it uses the multiplier table
- * pointed to by compptr->dct_table.  The output data is to be placed into the
- * sample array starting at a specified column.  (Any row offset needed will
- * be applied to the array pointer before it is passed to the IDCT code.)
- * Note that the number of samples emitted by the IDCT routine is
- * DCT_h_scaled_size * DCT_v_scaled_size.
- */
-
-/* typedef inverse_DCT_method_ptr is declared in jpegint.h */
-
-/*
- * Each IDCT routine has its own ideas about the best dct_table element type.
- */
-
-typedef MULTIPLIER ISLOW_MULT_TYPE; /* short or int, whichever is faster */
-#if BITS_IN_JSAMPLE == 8
-typedef MULTIPLIER IFAST_MULT_TYPE; /* 16 bits is OK, use short if faster */
-#define IFAST_SCALE_BITS  2	/* fractional bits in scale factors */
-#else
-typedef INT32 IFAST_MULT_TYPE;	/* need 32 bits for scaled quantizers */
-#define IFAST_SCALE_BITS  13	/* fractional bits in scale factors */
-#endif
-typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */
-
-
-/*
- * Each IDCT routine is responsible for range-limiting its results and
- * converting them to unsigned form (0..MAXJSAMPLE).  The raw outputs could
- * be quite far out of range if the input data is corrupt, so a bulletproof
- * range-limiting step is required.  We use a mask-and-table-lookup method
- * to do the combined operations quickly, assuming that RANGE_CENTER
- * (defined in jpegint.h) is a power of 2.  See the comments with
- * prepare_range_limit_table (in jdmaster.c) for more info.
- */
-
-#define RANGE_MASK  (RANGE_CENTER * 2 - 1)
-#define RANGE_SUBSET  (RANGE_CENTER - CENTERJSAMPLE)
-
-#define IDCT_range_limit(cinfo)  ((cinfo)->sample_range_limit - RANGE_SUBSET)
-
-
-/* Short forms of external names for systems with brain-damaged linkers. */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_fdct_islow		jFDislow
-#define jpeg_fdct_ifast		jFDifast
-#define jpeg_fdct_float		jFDfloat
-#define jpeg_fdct_7x7		jFD7x7
-#define jpeg_fdct_6x6		jFD6x6
-#define jpeg_fdct_5x5		jFD5x5
-#define jpeg_fdct_4x4		jFD4x4
-#define jpeg_fdct_3x3		jFD3x3
-#define jpeg_fdct_2x2		jFD2x2
-#define jpeg_fdct_1x1		jFD1x1
-#define jpeg_fdct_9x9		jFD9x9
-#define jpeg_fdct_10x10		jFD10x10
-#define jpeg_fdct_11x11		jFD11x11
-#define jpeg_fdct_12x12		jFD12x12
-#define jpeg_fdct_13x13		jFD13x13
-#define jpeg_fdct_14x14		jFD14x14
-#define jpeg_fdct_15x15		jFD15x15
-#define jpeg_fdct_16x16		jFD16x16
-#define jpeg_fdct_16x8		jFD16x8
-#define jpeg_fdct_14x7		jFD14x7
-#define jpeg_fdct_12x6		jFD12x6
-#define jpeg_fdct_10x5		jFD10x5
-#define jpeg_fdct_8x4		jFD8x4
-#define jpeg_fdct_6x3		jFD6x3
-#define jpeg_fdct_4x2		jFD4x2
-#define jpeg_fdct_2x1		jFD2x1
-#define jpeg_fdct_8x16		jFD8x16
-#define jpeg_fdct_7x14		jFD7x14
-#define jpeg_fdct_6x12		jFD6x12
-#define jpeg_fdct_5x10		jFD5x10
-#define jpeg_fdct_4x8		jFD4x8
-#define jpeg_fdct_3x6		jFD3x6
-#define jpeg_fdct_2x4		jFD2x4
-#define jpeg_fdct_1x2		jFD1x2
-#define jpeg_idct_islow		jRDislow
-#define jpeg_idct_ifast		jRDifast
-#define jpeg_idct_float		jRDfloat
-#define jpeg_idct_7x7		jRD7x7
-#define jpeg_idct_6x6		jRD6x6
-#define jpeg_idct_5x5		jRD5x5
-#define jpeg_idct_4x4		jRD4x4
-#define jpeg_idct_3x3		jRD3x3
-#define jpeg_idct_2x2		jRD2x2
-#define jpeg_idct_1x1		jRD1x1
-#define jpeg_idct_9x9		jRD9x9
-#define jpeg_idct_10x10		jRD10x10
-#define jpeg_idct_11x11		jRD11x11
-#define jpeg_idct_12x12		jRD12x12
-#define jpeg_idct_13x13		jRD13x13
-#define jpeg_idct_14x14		jRD14x14
-#define jpeg_idct_15x15		jRD15x15
-#define jpeg_idct_16x16		jRD16x16
-#define jpeg_idct_16x8		jRD16x8
-#define jpeg_idct_14x7		jRD14x7
-#define jpeg_idct_12x6		jRD12x6
-#define jpeg_idct_10x5		jRD10x5
-#define jpeg_idct_8x4		jRD8x4
-#define jpeg_idct_6x3		jRD6x3
-#define jpeg_idct_4x2		jRD4x2
-#define jpeg_idct_2x1		jRD2x1
-#define jpeg_idct_8x16		jRD8x16
-#define jpeg_idct_7x14		jRD7x14
-#define jpeg_idct_6x12		jRD6x12
-#define jpeg_idct_5x10		jRD5x10
-#define jpeg_idct_4x8		jRD4x8
-#define jpeg_idct_3x6		jRD3x6
-#define jpeg_idct_2x4		jRD2x4
-#define jpeg_idct_1x2		jRD1x2
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
-/* Extern declarations for the forward and inverse DCT routines. */
-
-EXTERN(void) jpeg_fdct_islow
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_ifast
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_float
-    JPP((FAST_FLOAT * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_7x7
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_6x6
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_5x5
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_4x4
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_3x3
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_2x2
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_1x1
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_9x9
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_10x10
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_11x11
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_12x12
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_13x13
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_14x14
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_15x15
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_16x16
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_16x8
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_14x7
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_12x6
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_10x5
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_8x4
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_6x3
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_4x2
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_2x1
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_8x16
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_7x14
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_6x12
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_5x10
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_4x8
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_3x6
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_2x4
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_1x2
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-
-EXTERN(void) jpeg_idct_islow
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_ifast
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_float
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_7x7
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_6x6
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_5x5
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_4x4
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_3x3
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_2x2
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_1x1
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_9x9
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_10x10
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_11x11
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_12x12
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_13x13
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_14x14
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_15x15
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_16x16
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_16x8
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_14x7
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_12x6
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_10x5
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_8x4
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_6x3
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_4x2
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_2x1
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_8x16
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_7x14
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_6x12
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_5x10
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_4x8
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_3x6
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_2x4
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_1x2
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-
-
-/*
- * Macros for handling fixed-point arithmetic; these are used by many
- * but not all of the DCT/IDCT modules.
- *
- * All values are expected to be of type INT32.
- * Fractional constants are scaled left by CONST_BITS bits.
- * CONST_BITS is defined within each module using these macros,
- * and may differ from one module to the next.
- */
-
-#define ONE	((INT32) 1)
-#define CONST_SCALE (ONE << CONST_BITS)
-
-/* Convert a positive real constant to an integer scaled by CONST_SCALE.
- * Caution: some C compilers fail to reduce "FIX(constant)" at compile time,
- * thus causing a lot of useless floating-point operations at run time.
- */
-
-#define FIX(x)	((INT32) ((x) * CONST_SCALE + 0.5))
-
-/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
- * This macro is used only when the two inputs will actually be no more than
- * 16 bits wide, so that a 16x16->32 bit multiply can be used instead of a
- * full 32x32 multiply.  This provides a useful speedup on many machines.
- * Unfortunately there is no way to specify a 16x16->32 multiply portably
- * in C, but some C compilers will do the right thing if you provide the
- * correct combination of casts.
- */
-
-#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
-#define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT16) (const)))
-#endif
-#ifdef SHORTxLCONST_32		/* known to work with Microsoft C 6.0 */
-#define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT32) (const)))
-#endif
-
-#ifndef MULTIPLY16C16		/* default definition */
-#define MULTIPLY16C16(var,const)  ((var) * (const))
-#endif
-
-/* Same except both inputs are variables. */
-
-#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
-#define MULTIPLY16V16(var1,var2)  (((INT16) (var1)) * ((INT16) (var2)))
-#endif
-
-#ifndef MULTIPLY16V16		/* default definition */
-#define MULTIPLY16V16(var1,var2)  ((var1) * (var2))
-#endif
-
-/* Like RIGHT_SHIFT, but applies to a DCTELEM.
- * We assume that int right shift is unsigned if INT32 right shift is.
- */
-
-#ifdef RIGHT_SHIFT_IS_UNSIGNED
-#define ISHIFT_TEMPS	DCTELEM ishift_temp;
-#if BITS_IN_JSAMPLE == 8
-#define DCTELEMBITS  16		/* DCTELEM may be 16 or 32 bits */
-#else
-#define DCTELEMBITS  32		/* DCTELEM must be 32 bits */
-#endif
-#define IRIGHT_SHIFT(x,shft)  \
-    ((ishift_temp = (x)) < 0 ? \
-     (ishift_temp >> (shft)) | ((~((DCTELEM) 0)) << (DCTELEMBITS-(shft))) : \
-     (ishift_temp >> (shft)))
-#else
-#define ISHIFT_TEMPS
-#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
-#endif
diff --git a/dep/libjpeg/src/jddctmgr.c b/dep/libjpeg/src/jddctmgr.c
deleted file mode 100644
index 9ecfbb510..000000000
--- a/dep/libjpeg/src/jddctmgr.c
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- * jddctmgr.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2002-2013 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the inverse-DCT management logic.
- * This code selects a particular IDCT implementation to be used,
- * and it performs related housekeeping chores.  No code in this file
- * is executed per IDCT step, only during output pass setup.
- *
- * Note that the IDCT routines are responsible for performing coefficient
- * dequantization as well as the IDCT proper.  This module sets up the
- * dequantization multiplier table needed by the IDCT routine.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-
-/*
- * The decompressor input side (jdinput.c) saves away the appropriate
- * quantization table for each component at the start of the first scan
- * involving that component.  (This is necessary in order to correctly
- * decode files that reuse Q-table slots.)
- * When we are ready to make an output pass, the saved Q-table is converted
- * to a multiplier table that will actually be used by the IDCT routine.
- * The multiplier table contents are IDCT-method-dependent.  To support
- * application changes in IDCT method between scans, we can remake the
- * multiplier tables if necessary.
- * In buffered-image mode, the first output pass may occur before any data
- * has been seen for some components, and thus before their Q-tables have
- * been saved away.  To handle this case, multiplier tables are preset
- * to zeroes; the result of the IDCT will be a neutral gray level.
- */
-
-
-/* Private subobject for this module */
-
-typedef struct {
-  struct jpeg_inverse_dct pub;	/* public fields */
-
-  /* This array contains the IDCT method code that each multiplier table
-   * is currently set up for, or -1 if it's not yet set up.
-   * The actual multiplier tables are pointed to by dct_table in the
-   * per-component comp_info structures.
-   */
-  int cur_method[MAX_COMPONENTS];
-} my_idct_controller;
-
-typedef my_idct_controller * my_idct_ptr;
-
-
-/* Allocated multiplier tables: big enough for any supported variant */
-
-typedef union {
-  ISLOW_MULT_TYPE islow_array[DCTSIZE2];
-#ifdef DCT_IFAST_SUPPORTED
-  IFAST_MULT_TYPE ifast_array[DCTSIZE2];
-#endif
-#ifdef DCT_FLOAT_SUPPORTED
-  FLOAT_MULT_TYPE float_array[DCTSIZE2];
-#endif
-} multiplier_table;
-
-
-/* The current scaled-IDCT routines require ISLOW-style multiplier tables,
- * so be sure to compile that code if either ISLOW or SCALING is requested.
- */
-#ifdef DCT_ISLOW_SUPPORTED
-#define PROVIDE_ISLOW_TABLES
-#else
-#ifdef IDCT_SCALING_SUPPORTED
-#define PROVIDE_ISLOW_TABLES
-#endif
-#endif
-
-
-/*
- * Prepare for an output pass.
- * Here we select the proper IDCT routine for each component and build
- * a matching multiplier table.
- */
-
-METHODDEF(void)
-start_pass (j_decompress_ptr cinfo)
-{
-  my_idct_ptr idct = (my_idct_ptr) cinfo->idct;
-  int ci, i;
-  jpeg_component_info *compptr;
-  int method = 0;
-  inverse_DCT_method_ptr method_ptr = NULL;
-  JQUANT_TBL * qtbl;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Select the proper IDCT routine for this component's scaling */
-    switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
-#ifdef IDCT_SCALING_SUPPORTED
-    case ((1 << 8) + 1):
-      method_ptr = jpeg_idct_1x1;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((2 << 8) + 2):
-      method_ptr = jpeg_idct_2x2;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((3 << 8) + 3):
-      method_ptr = jpeg_idct_3x3;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((4 << 8) + 4):
-      method_ptr = jpeg_idct_4x4;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((5 << 8) + 5):
-      method_ptr = jpeg_idct_5x5;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((6 << 8) + 6):
-      method_ptr = jpeg_idct_6x6;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((7 << 8) + 7):
-      method_ptr = jpeg_idct_7x7;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((9 << 8) + 9):
-      method_ptr = jpeg_idct_9x9;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((10 << 8) + 10):
-      method_ptr = jpeg_idct_10x10;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((11 << 8) + 11):
-      method_ptr = jpeg_idct_11x11;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((12 << 8) + 12):
-      method_ptr = jpeg_idct_12x12;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((13 << 8) + 13):
-      method_ptr = jpeg_idct_13x13;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((14 << 8) + 14):
-      method_ptr = jpeg_idct_14x14;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((15 << 8) + 15):
-      method_ptr = jpeg_idct_15x15;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((16 << 8) + 16):
-      method_ptr = jpeg_idct_16x16;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((16 << 8) + 8):
-      method_ptr = jpeg_idct_16x8;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((14 << 8) + 7):
-      method_ptr = jpeg_idct_14x7;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((12 << 8) + 6):
-      method_ptr = jpeg_idct_12x6;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((10 << 8) + 5):
-      method_ptr = jpeg_idct_10x5;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((8 << 8) + 4):
-      method_ptr = jpeg_idct_8x4;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((6 << 8) + 3):
-      method_ptr = jpeg_idct_6x3;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((4 << 8) + 2):
-      method_ptr = jpeg_idct_4x2;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((2 << 8) + 1):
-      method_ptr = jpeg_idct_2x1;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((8 << 8) + 16):
-      method_ptr = jpeg_idct_8x16;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((7 << 8) + 14):
-      method_ptr = jpeg_idct_7x14;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((6 << 8) + 12):
-      method_ptr = jpeg_idct_6x12;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((5 << 8) + 10):
-      method_ptr = jpeg_idct_5x10;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((4 << 8) + 8):
-      method_ptr = jpeg_idct_4x8;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((3 << 8) + 6):
-      method_ptr = jpeg_idct_3x6;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((2 << 8) + 4):
-      method_ptr = jpeg_idct_2x4;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((1 << 8) + 2):
-      method_ptr = jpeg_idct_1x2;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-#endif
-    case ((DCTSIZE << 8) + DCTSIZE):
-      switch (cinfo->dct_method) {
-#ifdef DCT_ISLOW_SUPPORTED
-      case JDCT_ISLOW:
-	method_ptr = jpeg_idct_islow;
-	method = JDCT_ISLOW;
-	break;
-#endif
-#ifdef DCT_IFAST_SUPPORTED
-      case JDCT_IFAST:
-	method_ptr = jpeg_idct_ifast;
-	method = JDCT_IFAST;
-	break;
-#endif
-#ifdef DCT_FLOAT_SUPPORTED
-      case JDCT_FLOAT:
-	method_ptr = jpeg_idct_float;
-	method = JDCT_FLOAT;
-	break;
-#endif
-      default:
-	ERREXIT(cinfo, JERR_NOT_COMPILED);
-	break;
-      }
-      break;
-    default:
-      ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
-	       compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
-      break;
-    }
-    idct->pub.inverse_DCT[ci] = method_ptr;
-    /* Create multiplier table from quant table.
-     * However, we can skip this if the component is uninteresting
-     * or if we already built the table.  Also, if no quant table
-     * has yet been saved for the component, we leave the
-     * multiplier table all-zero; we'll be reading zeroes from the
-     * coefficient controller's buffer anyway.
-     */
-    if (! compptr->component_needed || idct->cur_method[ci] == method)
-      continue;
-    qtbl = compptr->quant_table;
-    if (qtbl == NULL)		/* happens if no data yet for component */
-      continue;
-    idct->cur_method[ci] = method;
-    switch (method) {
-#ifdef PROVIDE_ISLOW_TABLES
-    case JDCT_ISLOW:
-      {
-	/* For LL&M IDCT method, multipliers are equal to raw quantization
-	 * coefficients, but are stored as ints to ensure access efficiency.
-	 */
-	ISLOW_MULT_TYPE * ismtbl = (ISLOW_MULT_TYPE *) compptr->dct_table;
-	for (i = 0; i < DCTSIZE2; i++) {
-	  ismtbl[i] = (ISLOW_MULT_TYPE) qtbl->quantval[i];
-	}
-      }
-      break;
-#endif
-#ifdef DCT_IFAST_SUPPORTED
-    case JDCT_IFAST:
-      {
-	/* For AA&N IDCT method, multipliers are equal to quantization
-	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
-	 *   scalefactor[0] = 1
-	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
-	 * For integer operation, the multiplier table is to be scaled by
-	 * IFAST_SCALE_BITS.
-	 */
-	IFAST_MULT_TYPE * ifmtbl = (IFAST_MULT_TYPE *) compptr->dct_table;
-#define CONST_BITS 14
-	static const INT16 aanscales[DCTSIZE2] = {
-	  /* precomputed values scaled up by 14 bits */
-	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
-	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
-	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
-	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
-	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
-	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
-	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
-	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
-	};
-	SHIFT_TEMPS
-
-	for (i = 0; i < DCTSIZE2; i++) {
-	  ifmtbl[i] = (IFAST_MULT_TYPE)
-	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
-				  (INT32) aanscales[i]),
-		    CONST_BITS-IFAST_SCALE_BITS);
-	}
-      }
-      break;
-#endif
-#ifdef DCT_FLOAT_SUPPORTED
-    case JDCT_FLOAT:
-      {
-	/* For float AA&N IDCT method, multipliers are equal to quantization
-	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
-	 *   scalefactor[0] = 1
-	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
-	 * We apply a further scale factor of 1/8.
-	 */
-	FLOAT_MULT_TYPE * fmtbl = (FLOAT_MULT_TYPE *) compptr->dct_table;
-	int row, col;
-	static const double aanscalefactor[DCTSIZE] = {
-	  1.0, 1.387039845, 1.306562965, 1.175875602,
-	  1.0, 0.785694958, 0.541196100, 0.275899379
-	};
-
-	i = 0;
-	for (row = 0; row < DCTSIZE; row++) {
-	  for (col = 0; col < DCTSIZE; col++) {
-	    fmtbl[i] = (FLOAT_MULT_TYPE)
-	      ((double) qtbl->quantval[i] *
-	       aanscalefactor[row] * aanscalefactor[col] * 0.125);
-	    i++;
-	  }
-	}
-      }
-      break;
-#endif
-    default:
-      ERREXIT(cinfo, JERR_NOT_COMPILED);
-      break;
-    }
-  }
-}
-
-
-/*
- * Initialize IDCT manager.
- */
-
-GLOBAL(void)
-jinit_inverse_dct (j_decompress_ptr cinfo)
-{
-  my_idct_ptr idct;
-  int ci;
-  jpeg_component_info *compptr;
-
-  idct = (my_idct_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_idct_controller));
-  cinfo->idct = &idct->pub;
-  idct->pub.start_pass = start_pass;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Allocate and pre-zero a multiplier table for each component */
-    compptr->dct_table =
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(multiplier_table));
-    MEMZERO(compptr->dct_table, SIZEOF(multiplier_table));
-    /* Mark multiplier table not yet set up for any method */
-    idct->cur_method[ci] = -1;
-  }
-}
diff --git a/dep/libjpeg/src/jdhuff.c b/dep/libjpeg/src/jdhuff.c
deleted file mode 100644
index f175f0c32..000000000
--- a/dep/libjpeg/src/jdhuff.c
+++ /dev/null
@@ -1,1559 +0,0 @@
-/*
- * jdhuff.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2006-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains Huffman entropy decoding routines.
- * Both sequential and progressive modes are supported in this single module.
- *
- * Much of the complexity here has to do with supporting input suspension.
- * If the data source module demands suspension, we want to be able to back
- * up to the start of the current MCU.  To do this, we copy state variables
- * into local working storage, and update them back to the permanent
- * storage only upon successful completion of an MCU.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Derived data constructed for each Huffman table */
-
-#define HUFF_LOOKAHEAD	8	/* # of bits of lookahead */
-
-typedef struct {
-  /* Basic tables: (element [0] of each array is unused) */
-  INT32 maxcode[18];		/* largest code of length k (-1 if none) */
-  /* (maxcode[17] is a sentinel to ensure jpeg_huff_decode terminates) */
-  INT32 valoffset[17];		/* huffval[] offset for codes of length k */
-  /* valoffset[k] = huffval[] index of 1st symbol of code length k, less
-   * the smallest code of length k; so given a code of length k, the
-   * corresponding symbol is huffval[code + valoffset[k]]
-   */
-
-  /* Link to public Huffman table (needed only in jpeg_huff_decode) */
-  JHUFF_TBL *pub;
-
-  /* Lookahead tables: indexed by the next HUFF_LOOKAHEAD bits of
-   * the input data stream.  If the next Huffman code is no more
-   * than HUFF_LOOKAHEAD bits long, we can obtain its length and
-   * the corresponding symbol directly from these tables.
-   */
-  int look_nbits[1<<HUFF_LOOKAHEAD]; /* # bits, or 0 if too long */
-  UINT8 look_sym[1<<HUFF_LOOKAHEAD]; /* symbol, or unused */
-} d_derived_tbl;
-
-
-/*
- * Fetching the next N bits from the input stream is a time-critical operation
- * for the Huffman decoders.  We implement it with a combination of inline
- * macros and out-of-line subroutines.  Note that N (the number of bits
- * demanded at one time) never exceeds 15 for JPEG use.
- *
- * We read source bytes into get_buffer and dole out bits as needed.
- * If get_buffer already contains enough bits, they are fetched in-line
- * by the macros CHECK_BIT_BUFFER and GET_BITS.  When there aren't enough
- * bits, jpeg_fill_bit_buffer is called; it will attempt to fill get_buffer
- * as full as possible (not just to the number of bits needed; this
- * prefetching reduces the overhead cost of calling jpeg_fill_bit_buffer).
- * Note that jpeg_fill_bit_buffer may return FALSE to indicate suspension.
- * On TRUE return, jpeg_fill_bit_buffer guarantees that get_buffer contains
- * at least the requested number of bits --- dummy zeroes are inserted if
- * necessary.
- */
-
-typedef INT32 bit_buf_type;	/* type of bit-extraction buffer */
-#define BIT_BUF_SIZE  32	/* size of buffer in bits */
-
-/* If long is > 32 bits on your machine, and shifting/masking longs is
- * reasonably fast, making bit_buf_type be long and setting BIT_BUF_SIZE
- * appropriately should be a win.  Unfortunately we can't define the size
- * with something like  #define BIT_BUF_SIZE (sizeof(bit_buf_type)*8)
- * because not all machines measure sizeof in 8-bit bytes.
- */
-
-typedef struct {		/* Bitreading state saved across MCUs */
-  bit_buf_type get_buffer;	/* current bit-extraction buffer */
-  int bits_left;		/* # of unused bits in it */
-} bitread_perm_state;
-
-typedef struct {		/* Bitreading working state within an MCU */
-  /* Current data source location */
-  /* We need a copy, rather than munging the original, in case of suspension */
-  const JOCTET * next_input_byte; /* => next byte to read from source */
-  size_t bytes_in_buffer;	/* # of bytes remaining in source buffer */
-  /* Bit input buffer --- note these values are kept in register variables,
-   * not in this struct, inside the inner loops.
-   */
-  bit_buf_type get_buffer;	/* current bit-extraction buffer */
-  int bits_left;		/* # of unused bits in it */
-  /* Pointer needed by jpeg_fill_bit_buffer. */
-  j_decompress_ptr cinfo;	/* back link to decompress master record */
-} bitread_working_state;
-
-/* Macros to declare and load/save bitread local variables. */
-#define BITREAD_STATE_VARS  \
-	register bit_buf_type get_buffer;  \
-	register int bits_left;  \
-	bitread_working_state br_state
-
-#define BITREAD_LOAD_STATE(cinfop,permstate)  \
-	br_state.cinfo = cinfop; \
-	br_state.next_input_byte = cinfop->src->next_input_byte; \
-	br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \
-	get_buffer = permstate.get_buffer; \
-	bits_left = permstate.bits_left;
-
-#define BITREAD_SAVE_STATE(cinfop,permstate)  \
-	cinfop->src->next_input_byte = br_state.next_input_byte; \
-	cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \
-	permstate.get_buffer = get_buffer; \
-	permstate.bits_left = bits_left
-
-/*
- * These macros provide the in-line portion of bit fetching.
- * Use CHECK_BIT_BUFFER to ensure there are N bits in get_buffer
- * before using GET_BITS, PEEK_BITS, or DROP_BITS.
- * The variables get_buffer and bits_left are assumed to be locals,
- * but the state struct might not be (jpeg_huff_decode needs this).
- *	CHECK_BIT_BUFFER(state,n,action);
- *		Ensure there are N bits in get_buffer; if suspend, take action.
- *      val = GET_BITS(n);
- *		Fetch next N bits.
- *      val = PEEK_BITS(n);
- *		Fetch next N bits without removing them from the buffer.
- *	DROP_BITS(n);
- *		Discard next N bits.
- * The value N should be a simple variable, not an expression, because it
- * is evaluated multiple times.
- */
-
-#define CHECK_BIT_BUFFER(state,nbits,action) \
-	{ if (bits_left < (nbits)) {  \
-	    if (! jpeg_fill_bit_buffer(&(state),get_buffer,bits_left,nbits))  \
-	      { action; }  \
-	    get_buffer = (state).get_buffer; bits_left = (state).bits_left; } }
-
-#define GET_BITS(nbits) \
-	(((int) (get_buffer >> (bits_left -= (nbits)))) & BIT_MASK(nbits))
-
-#define PEEK_BITS(nbits) \
-	(((int) (get_buffer >> (bits_left -  (nbits)))) & BIT_MASK(nbits))
-
-#define DROP_BITS(nbits) \
-	(bits_left -= (nbits))
-
-
-/*
- * Code for extracting next Huffman-coded symbol from input bit stream.
- * Again, this is time-critical and we make the main paths be macros.
- *
- * We use a lookahead table to process codes of up to HUFF_LOOKAHEAD bits
- * without looping.  Usually, more than 95% of the Huffman codes will be 8
- * or fewer bits long.  The few overlength codes are handled with a loop,
- * which need not be inline code.
- *
- * Notes about the HUFF_DECODE macro:
- * 1. Near the end of the data segment, we may fail to get enough bits
- *    for a lookahead.  In that case, we do it the hard way.
- * 2. If the lookahead table contains no entry, the next code must be
- *    more than HUFF_LOOKAHEAD bits long.
- * 3. jpeg_huff_decode returns -1 if forced to suspend.
- */
-
-#define HUFF_DECODE(result,state,htbl,failaction,slowlabel) \
-{ register int nb, look; \
-  if (bits_left < HUFF_LOOKAHEAD) { \
-    if (! jpeg_fill_bit_buffer(&state,get_buffer,bits_left, 0)) {failaction;} \
-    get_buffer = state.get_buffer; bits_left = state.bits_left; \
-    if (bits_left < HUFF_LOOKAHEAD) { \
-      nb = 1; goto slowlabel; \
-    } \
-  } \
-  look = PEEK_BITS(HUFF_LOOKAHEAD); \
-  if ((nb = htbl->look_nbits[look]) != 0) { \
-    DROP_BITS(nb); \
-    result = htbl->look_sym[look]; \
-  } else { \
-    nb = HUFF_LOOKAHEAD+1; \
-slowlabel: \
-    if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \
-	{ failaction; } \
-    get_buffer = state.get_buffer; bits_left = state.bits_left; \
-  } \
-}
-
-
-/*
- * Expanded entropy decoder object for Huffman decoding.
- *
- * The savable_state subrecord contains fields that change within an MCU,
- * but must not be updated permanently until we complete the MCU.
- */
-
-typedef struct {
-  unsigned int EOBRUN;			/* remaining EOBs in EOBRUN */
-  int last_dc_val[MAX_COMPS_IN_SCAN];	/* last DC coef for each component */
-} savable_state;
-
-/* This macro is to work around compilers with missing or broken
- * structure assignment.  You'll need to fix this code if you have
- * such a compiler and you change MAX_COMPS_IN_SCAN.
- */
-
-#ifndef NO_STRUCT_ASSIGN
-#define ASSIGN_STATE(dest,src)  ((dest) = (src))
-#else
-#if MAX_COMPS_IN_SCAN == 4
-#define ASSIGN_STATE(dest,src)  \
-	((dest).EOBRUN = (src).EOBRUN, \
-	 (dest).last_dc_val[0] = (src).last_dc_val[0], \
-	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
-	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
-	 (dest).last_dc_val[3] = (src).last_dc_val[3])
-#endif
-#endif
-
-
-typedef struct {
-  struct jpeg_entropy_decoder pub; /* public fields */
-
-  /* These fields are loaded into local variables at start of each MCU.
-   * In case of suspension, we exit WITHOUT updating them.
-   */
-  bitread_perm_state bitstate;	/* Bit buffer at start of MCU */
-  savable_state saved;		/* Other state at start of MCU */
-
-  /* These fields are NOT loaded into local working state. */
-  boolean insufficient_data;	/* set TRUE after emitting warning */
-  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
-
-  /* Following two fields used only in progressive mode */
-
-  /* Pointers to derived tables (these workspaces have image lifespan) */
-  d_derived_tbl * derived_tbls[NUM_HUFF_TBLS];
-
-  d_derived_tbl * ac_derived_tbl; /* active table during an AC scan */
-
-  /* Following fields used only in sequential mode */
-
-  /* Pointers to derived tables (these workspaces have image lifespan) */
-  d_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
-  d_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS];
-
-  /* Precalculated info set up by start_pass for use in decode_mcu: */
-
-  /* Pointers to derived tables to be used for each block within an MCU */
-  d_derived_tbl * dc_cur_tbls[D_MAX_BLOCKS_IN_MCU];
-  d_derived_tbl * ac_cur_tbls[D_MAX_BLOCKS_IN_MCU];
-  /* Whether we care about the DC and AC coefficient values for each block */
-  int coef_limit[D_MAX_BLOCKS_IN_MCU];
-} huff_entropy_decoder;
-
-typedef huff_entropy_decoder * huff_entropy_ptr;
-
-
-static const int jpeg_zigzag_order[8][8] = {
-  {  0,  1,  5,  6, 14, 15, 27, 28 },
-  {  2,  4,  7, 13, 16, 26, 29, 42 },
-  {  3,  8, 12, 17, 25, 30, 41, 43 },
-  {  9, 11, 18, 24, 31, 40, 44, 53 },
-  { 10, 19, 23, 32, 39, 45, 52, 54 },
-  { 20, 22, 33, 38, 46, 51, 55, 60 },
-  { 21, 34, 37, 47, 50, 56, 59, 61 },
-  { 35, 36, 48, 49, 57, 58, 62, 63 }
-};
-
-static const int jpeg_zigzag_order7[7][7] = {
-  {  0,  1,  5,  6, 14, 15, 27 },
-  {  2,  4,  7, 13, 16, 26, 28 },
-  {  3,  8, 12, 17, 25, 29, 38 },
-  {  9, 11, 18, 24, 30, 37, 39 },
-  { 10, 19, 23, 31, 36, 40, 45 },
-  { 20, 22, 32, 35, 41, 44, 46 },
-  { 21, 33, 34, 42, 43, 47, 48 }
-};
-
-static const int jpeg_zigzag_order6[6][6] = {
-  {  0,  1,  5,  6, 14, 15 },
-  {  2,  4,  7, 13, 16, 25 },
-  {  3,  8, 12, 17, 24, 26 },
-  {  9, 11, 18, 23, 27, 32 },
-  { 10, 19, 22, 28, 31, 33 },
-  { 20, 21, 29, 30, 34, 35 }
-};
-
-static const int jpeg_zigzag_order5[5][5] = {
-  {  0,  1,  5,  6, 14 },
-  {  2,  4,  7, 13, 15 },
-  {  3,  8, 12, 16, 21 },
-  {  9, 11, 17, 20, 22 },
-  { 10, 18, 19, 23, 24 }
-};
-
-static const int jpeg_zigzag_order4[4][4] = {
-  { 0,  1,  5,  6 },
-  { 2,  4,  7, 12 },
-  { 3,  8, 11, 13 },
-  { 9, 10, 14, 15 }
-};
-
-static const int jpeg_zigzag_order3[3][3] = {
-  { 0, 1, 5 },
-  { 2, 4, 6 },
-  { 3, 7, 8 }
-};
-
-static const int jpeg_zigzag_order2[2][2] = {
-  { 0, 1 },
-  { 2, 3 }
-};
-
-
-/*
- * Compute the derived values for a Huffman table.
- * This routine also performs some validation checks on the table.
- */
-
-LOCAL(void)
-jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno,
-			 d_derived_tbl ** pdtbl)
-{
-  JHUFF_TBL *htbl;
-  d_derived_tbl *dtbl;
-  int p, i, l, si, numsymbols;
-  int lookbits, ctr;
-  char huffsize[257];
-  unsigned int huffcode[257];
-  unsigned int code;
-
-  /* Note that huffsize[] and huffcode[] are filled in code-length order,
-   * paralleling the order of the symbols themselves in htbl->huffval[].
-   */
-
-  /* Find the input Huffman table */
-  if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
-    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
-  htbl =
-    isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
-  if (htbl == NULL)
-    htbl = jpeg_std_huff_table((j_common_ptr) cinfo, isDC, tblno);
-
-  /* Allocate a workspace if we haven't already done so. */
-  if (*pdtbl == NULL)
-    *pdtbl = (d_derived_tbl *) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(d_derived_tbl));
-  dtbl = *pdtbl;
-  dtbl->pub = htbl;		/* fill in back link */
-  
-  /* Figure C.1: make table of Huffman code length for each symbol */
-
-  p = 0;
-  for (l = 1; l <= 16; l++) {
-    i = (int) htbl->bits[l];
-    if (i < 0 || p + i > 256)	/* protect against table overrun */
-      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-    while (i--)
-      huffsize[p++] = (char) l;
-  }
-  huffsize[p] = 0;
-  numsymbols = p;
-  
-  /* Figure C.2: generate the codes themselves */
-  /* We also validate that the counts represent a legal Huffman code tree. */
-  
-  code = 0;
-  si = huffsize[0];
-  p = 0;
-  while (huffsize[p]) {
-    while (((int) huffsize[p]) == si) {
-      huffcode[p++] = code;
-      code++;
-    }
-    /* code is now 1 more than the last code used for codelength si; but
-     * it must still fit in si bits, since no code is allowed to be all ones.
-     */
-    if (((INT32) code) >= (((INT32) 1) << si))
-      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-    code <<= 1;
-    si++;
-  }
-
-  /* Figure F.15: generate decoding tables for bit-sequential decoding */
-
-  p = 0;
-  for (l = 1; l <= 16; l++) {
-    if (htbl->bits[l]) {
-      /* valoffset[l] = huffval[] index of 1st symbol of code length l,
-       * minus the minimum code of length l
-       */
-      dtbl->valoffset[l] = (INT32) p - (INT32) huffcode[p];
-      p += htbl->bits[l];
-      dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */
-    } else {
-      dtbl->maxcode[l] = -1;	/* -1 if no codes of this length */
-    }
-  }
-  dtbl->maxcode[17] = 0xFFFFFL; /* ensures jpeg_huff_decode terminates */
-
-  /* Compute lookahead tables to speed up decoding.
-   * First we set all the table entries to 0, indicating "too long";
-   * then we iterate through the Huffman codes that are short enough and
-   * fill in all the entries that correspond to bit sequences starting
-   * with that code.
-   */
-
-  MEMZERO(dtbl->look_nbits, SIZEOF(dtbl->look_nbits));
-
-  p = 0;
-  for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
-    for (i = 1; i <= (int) htbl->bits[l]; i++, p++) {
-      /* l = current code's length, p = its index in huffcode[] & huffval[]. */
-      /* Generate left-justified code followed by all possible bit sequences */
-      lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
-      for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) {
-	dtbl->look_nbits[lookbits] = l;
-	dtbl->look_sym[lookbits] = htbl->huffval[p];
-	lookbits++;
-      }
-    }
-  }
-
-  /* Validate symbols as being reasonable.
-   * For AC tables, we make no check, but accept all byte values 0..255.
-   * For DC tables, we require the symbols to be in range 0..15.
-   * (Tighter bounds could be applied depending on the data depth and mode,
-   * but this is sufficient to ensure safe decoding.)
-   */
-  if (isDC) {
-    for (i = 0; i < numsymbols; i++) {
-      int sym = htbl->huffval[i];
-      if (sym < 0 || sym > 15)
-	ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-    }
-  }
-}
-
-
-/*
- * Out-of-line code for bit fetching.
- * Note: current values of get_buffer and bits_left are passed as parameters,
- * but are returned in the corresponding fields of the state struct.
- *
- * On most machines MIN_GET_BITS should be 25 to allow the full 32-bit width
- * of get_buffer to be used.  (On machines with wider words, an even larger
- * buffer could be used.)  However, on some machines 32-bit shifts are
- * quite slow and take time proportional to the number of places shifted.
- * (This is true with most PC compilers, for instance.)  In this case it may
- * be a win to set MIN_GET_BITS to the minimum value of 15.  This reduces the
- * average shift distance at the cost of more calls to jpeg_fill_bit_buffer.
- */
-
-#ifdef SLOW_SHIFT_32
-#define MIN_GET_BITS  15	/* minimum allowable value */
-#else
-#define MIN_GET_BITS  (BIT_BUF_SIZE-7)
-#endif
-
-
-LOCAL(boolean)
-jpeg_fill_bit_buffer (bitread_working_state * state,
-		      register bit_buf_type get_buffer, register int bits_left,
-		      int nbits)
-/* Load up the bit buffer to a depth of at least nbits */
-{
-  /* Copy heavily used state fields into locals (hopefully registers) */
-  register const JOCTET * next_input_byte = state->next_input_byte;
-  register size_t bytes_in_buffer = state->bytes_in_buffer;
-  j_decompress_ptr cinfo = state->cinfo;
-
-  /* Attempt to load at least MIN_GET_BITS bits into get_buffer. */
-  /* (It is assumed that no request will be for more than that many bits.) */
-  /* We fail to do so only if we hit a marker or are forced to suspend. */
-
-  if (cinfo->unread_marker == 0) {	/* cannot advance past a marker */
-    while (bits_left < MIN_GET_BITS) {
-      register int c;
-
-      /* Attempt to read a byte */
-      if (bytes_in_buffer == 0) {
-	if (! (*cinfo->src->fill_input_buffer) (cinfo))
-	  return FALSE;
-	next_input_byte = cinfo->src->next_input_byte;
-	bytes_in_buffer = cinfo->src->bytes_in_buffer;
-      }
-      bytes_in_buffer--;
-      c = GETJOCTET(*next_input_byte++);
-
-      /* If it's 0xFF, check and discard stuffed zero byte */
-      if (c == 0xFF) {
-	/* Loop here to discard any padding FF's on terminating marker,
-	 * so that we can save a valid unread_marker value.  NOTE: we will
-	 * accept multiple FF's followed by a 0 as meaning a single FF data
-	 * byte.  This data pattern is not valid according to the standard.
-	 */
-	do {
-	  if (bytes_in_buffer == 0) {
-	    if (! (*cinfo->src->fill_input_buffer) (cinfo))
-	      return FALSE;
-	    next_input_byte = cinfo->src->next_input_byte;
-	    bytes_in_buffer = cinfo->src->bytes_in_buffer;
-	  }
-	  bytes_in_buffer--;
-	  c = GETJOCTET(*next_input_byte++);
-	} while (c == 0xFF);
-
-	if (c == 0) {
-	  /* Found FF/00, which represents an FF data byte */
-	  c = 0xFF;
-	} else {
-	  /* Oops, it's actually a marker indicating end of compressed data.
-	   * Save the marker code for later use.
-	   * Fine point: it might appear that we should save the marker into
-	   * bitread working state, not straight into permanent state.  But
-	   * once we have hit a marker, we cannot need to suspend within the
-	   * current MCU, because we will read no more bytes from the data
-	   * source.  So it is OK to update permanent state right away.
-	   */
-	  cinfo->unread_marker = c;
-	  /* See if we need to insert some fake zero bits. */
-	  goto no_more_bytes;
-	}
-      }
-
-      /* OK, load c into get_buffer */
-      get_buffer = (get_buffer << 8) | c;
-      bits_left += 8;
-    } /* end while */
-  } else {
-  no_more_bytes:
-    /* We get here if we've read the marker that terminates the compressed
-     * data segment.  There should be enough bits in the buffer register
-     * to satisfy the request; if so, no problem.
-     */
-    if (nbits > bits_left) {
-      /* Uh-oh.  Report corrupted data to user and stuff zeroes into
-       * the data stream, so that we can produce some kind of image.
-       * We use a nonvolatile flag to ensure that only one warning message
-       * appears per data segment.
-       */
-      if (! ((huff_entropy_ptr) cinfo->entropy)->insufficient_data) {
-	WARNMS(cinfo, JWRN_HIT_MARKER);
-	((huff_entropy_ptr) cinfo->entropy)->insufficient_data = TRUE;
-      }
-      /* Fill the buffer with zero bits */
-      get_buffer <<= MIN_GET_BITS - bits_left;
-      bits_left = MIN_GET_BITS;
-    }
-  }
-
-  /* Unload the local registers */
-  state->next_input_byte = next_input_byte;
-  state->bytes_in_buffer = bytes_in_buffer;
-  state->get_buffer = get_buffer;
-  state->bits_left = bits_left;
-
-  return TRUE;
-}
-
-
-/*
- * Figure F.12: extend sign bit.
- * On some machines, a shift and sub will be faster than a table lookup.
- */
-
-#ifdef AVOID_TABLES
-
-#define BIT_MASK(nbits)   ((1<<(nbits))-1)
-#define HUFF_EXTEND(x,s)  ((x) < (1<<((s)-1)) ? (x) - ((1<<(s))-1) : (x))
-
-#else
-
-#define BIT_MASK(nbits)   bmask[nbits]
-#define HUFF_EXTEND(x,s)  ((x) <= bmask[(s) - 1] ? (x) - bmask[s] : (x))
-
-static const int bmask[16] =	/* bmask[n] is mask for n rightmost bits */
-  { 0, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF,
-    0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF };
-
-#endif /* AVOID_TABLES */
-
-
-/*
- * Out-of-line code for Huffman code decoding.
- */
-
-LOCAL(int)
-jpeg_huff_decode (bitread_working_state * state,
-		  register bit_buf_type get_buffer, register int bits_left,
-		  d_derived_tbl * htbl, int min_bits)
-{
-  register int l = min_bits;
-  register INT32 code;
-
-  /* HUFF_DECODE has determined that the code is at least min_bits */
-  /* bits long, so fetch that many bits in one swoop. */
-
-  CHECK_BIT_BUFFER(*state, l, return -1);
-  code = GET_BITS(l);
-
-  /* Collect the rest of the Huffman code one bit at a time. */
-  /* This is per Figure F.16 in the JPEG spec. */
-
-  while (code > htbl->maxcode[l]) {
-    code <<= 1;
-    CHECK_BIT_BUFFER(*state, 1, return -1);
-    code |= GET_BITS(1);
-    l++;
-  }
-
-  /* Unload the local registers */
-  state->get_buffer = get_buffer;
-  state->bits_left = bits_left;
-
-  /* With garbage input we may reach the sentinel value l = 17. */
-
-  if (l > 16) {
-    WARNMS(state->cinfo, JWRN_HUFF_BAD_CODE);
-    return 0;			/* fake a zero as the safest result */
-  }
-
-  return htbl->pub->huffval[ (int) (code + htbl->valoffset[l]) ];
-}
-
-
-/*
- * Finish up at the end of a Huffman-compressed scan.
- */
-
-METHODDEF(void)
-finish_pass_huff (j_decompress_ptr cinfo)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-
-  /* Throw away any unused bits remaining in bit buffer; */
-  /* include any full bytes in next_marker's count of discarded bytes */
-  cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8;
-  entropy->bitstate.bits_left = 0;
-}
-
-
-/*
- * Check for a restart marker & resynchronize decoder.
- * Returns FALSE if must suspend.
- */
-
-LOCAL(boolean)
-process_restart (j_decompress_ptr cinfo)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int ci;
-
-  finish_pass_huff(cinfo);
-
-  /* Advance past the RSTn marker */
-  if (! (*cinfo->marker->read_restart_marker) (cinfo))
-    return FALSE;
-
-  /* Re-initialize DC predictions to 0 */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++)
-    entropy->saved.last_dc_val[ci] = 0;
-  /* Re-init EOB run count, too */
-  entropy->saved.EOBRUN = 0;
-
-  /* Reset restart counter */
-  entropy->restarts_to_go = cinfo->restart_interval;
-
-  /* Reset out-of-data flag, unless read_restart_marker left us smack up
-   * against a marker.  In that case we will end up treating the next data
-   * segment as empty, and we can avoid producing bogus output pixels by
-   * leaving the flag set.
-   */
-  if (cinfo->unread_marker == 0)
-    entropy->insufficient_data = FALSE;
-
-  return TRUE;
-}
-
-
-/*
- * Huffman MCU decoding.
- * Each of these routines decodes and returns one MCU's worth of
- * Huffman-compressed coefficients. 
- * The coefficients are reordered from zigzag order into natural array order,
- * but are not dequantized.
- *
- * The i'th block of the MCU is stored into the block pointed to by
- * MCU_data[i].  WE ASSUME THIS AREA IS INITIALLY ZEROED BY THE CALLER.
- * (Wholesale zeroing is usually a little faster than retail...)
- *
- * We return FALSE if data source requested suspension.  In that case no
- * changes have been made to permanent state.  (Exception: some output
- * coefficients may already have been assigned.  This is harmless for
- * spectral selection, since we'll just re-assign them on the next call.
- * Successive approximation AC refinement has to be more careful, however.)
- */
-
-/*
- * MCU decoding for DC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int Al = cinfo->Al;
-  register int s, r;
-  int blkn, ci;
-  JBLOCKROW block;
-  BITREAD_STATE_VARS;
-  savable_state state;
-  d_derived_tbl * tbl;
-  jpeg_component_info * compptr;
-
-  /* Process restart marker if needed; may have to suspend */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      if (! process_restart(cinfo))
-	return FALSE;
-  }
-
-  /* If we've run out of data, just leave the MCU set to zeroes.
-   * This way, we return uniform gray for the remainder of the segment.
-   */
-  if (! entropy->insufficient_data) {
-
-    /* Load up working state */
-    BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
-    ASSIGN_STATE(state, entropy->saved);
-
-    /* Outer loop handles each block in the MCU */
-
-    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-      block = MCU_data[blkn];
-      ci = cinfo->MCU_membership[blkn];
-      compptr = cinfo->cur_comp_info[ci];
-      tbl = entropy->derived_tbls[compptr->dc_tbl_no];
-
-      /* Decode a single block's worth of coefficients */
-
-      /* Section F.2.2.1: decode the DC coefficient difference */
-      HUFF_DECODE(s, br_state, tbl, return FALSE, label1);
-      if (s) {
-	CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	r = GET_BITS(s);
-	s = HUFF_EXTEND(r, s);
-      }
-
-      /* Convert DC difference to actual value, update last_dc_val */
-      s += state.last_dc_val[ci];
-      state.last_dc_val[ci] = s;
-      /* Scale and output the coefficient (assumes jpeg_natural_order[0]=0) */
-      (*block)[0] = (JCOEF) (s << Al);
-    }
-
-    /* Completed MCU, so update state */
-    BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
-    ASSIGN_STATE(entropy->saved, state);
-  }
-
-  /* Account for restart interval if using restarts */
-  if (cinfo->restart_interval)
-    entropy->restarts_to_go--;
-
-  return TRUE;
-}
-
-
-/*
- * MCU decoding for AC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  register int s, k, r;
-  unsigned int EOBRUN;
-  int Se, Al;
-  const int * natural_order;
-  JBLOCKROW block;
-  BITREAD_STATE_VARS;
-  d_derived_tbl * tbl;
-
-  /* Process restart marker if needed; may have to suspend */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      if (! process_restart(cinfo))
-	return FALSE;
-  }
-
-  /* If we've run out of data, just leave the MCU set to zeroes.
-   * This way, we return uniform gray for the remainder of the segment.
-   */
-  if (! entropy->insufficient_data) {
-
-    /* Load up working state.
-     * We can avoid loading/saving bitread state if in an EOB run.
-     */
-    EOBRUN = entropy->saved.EOBRUN;	/* only part of saved state we need */
-
-    /* There is always only one block per MCU */
-
-    if (EOBRUN)			/* if it's a band of zeroes... */
-      EOBRUN--;			/* ...process it now (we do nothing) */
-    else {
-      BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
-      Se = cinfo->Se;
-      Al = cinfo->Al;
-      natural_order = cinfo->natural_order;
-      block = MCU_data[0];
-      tbl = entropy->ac_derived_tbl;
-
-      for (k = cinfo->Ss; k <= Se; k++) {
-	HUFF_DECODE(s, br_state, tbl, return FALSE, label2);
-	r = s >> 4;
-	s &= 15;
-	if (s) {
-	  k += r;
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  r = GET_BITS(s);
-	  s = HUFF_EXTEND(r, s);
-	  /* Scale and output coefficient in natural (dezigzagged) order */
-	  (*block)[natural_order[k]] = (JCOEF) (s << Al);
-	} else {
-	  if (r != 15) {	/* EOBr, run length is 2^r + appended bits */
-	    if (r) {		/* EOBr, r > 0 */
-	      EOBRUN = 1 << r;
-	      CHECK_BIT_BUFFER(br_state, r, return FALSE);
-	      r = GET_BITS(r);
-	      EOBRUN += r;
-	      EOBRUN--;		/* this band is processed at this moment */
-	    }
-	    break;		/* force end-of-band */
-	  }
-	  k += 15;		/* ZRL: skip 15 zeroes in band */
-	}
-      }
-
-      BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
-    }
-
-    /* Completed MCU, so update state */
-    entropy->saved.EOBRUN = EOBRUN;	/* only part of saved state we need */
-  }
-
-  /* Account for restart interval if using restarts */
-  if (cinfo->restart_interval)
-    entropy->restarts_to_go--;
-
-  return TRUE;
-}
-
-
-/*
- * MCU decoding for DC successive approximation refinement scan.
- * Note: we assume such scans can be multi-component,
- * although the spec is not very clear on the point.
- */
-
-METHODDEF(boolean)
-decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  JCOEF p1;
-  int blkn;
-  BITREAD_STATE_VARS;
-
-  /* Process restart marker if needed; may have to suspend */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      if (! process_restart(cinfo))
-	return FALSE;
-  }
-
-  /* Not worth the cycles to check insufficient_data here,
-   * since we will not change the data anyway if we read zeroes.
-   */
-
-  /* Load up working state */
-  BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
-
-  p1 = 1 << cinfo->Al;		/* 1 in the bit position being coded */
-
-  /* Outer loop handles each block in the MCU */
-
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    /* Encoded data is simply the next bit of the two's-complement DC value */
-    CHECK_BIT_BUFFER(br_state, 1, return FALSE);
-    if (GET_BITS(1))
-      MCU_data[blkn][0][0] |= p1;
-    /* Note: since we use |=, repeating the assignment later is safe */
-  }
-
-  /* Completed MCU, so update state */
-  BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
-
-  /* Account for restart interval if using restarts */
-  if (cinfo->restart_interval)
-    entropy->restarts_to_go--;
-
-  return TRUE;
-}
-
-
-/*
- * MCU decoding for AC successive approximation refinement scan.
- */
-
-METHODDEF(boolean)
-decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  register int s, k, r;
-  unsigned int EOBRUN;
-  int Se;
-  JCOEF p1, m1;
-  const int * natural_order;
-  JBLOCKROW block;
-  JCOEFPTR thiscoef;
-  BITREAD_STATE_VARS;
-  d_derived_tbl * tbl;
-  int num_newnz;
-  int newnz_pos[DCTSIZE2];
-
-  /* Process restart marker if needed; may have to suspend */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      if (! process_restart(cinfo))
-	return FALSE;
-  }
-
-  /* If we've run out of data, don't modify the MCU.
-   */
-  if (! entropy->insufficient_data) {
-
-    Se = cinfo->Se;
-    p1 = 1 << cinfo->Al;	/* 1 in the bit position being coded */
-    m1 = -p1;			/* -1 in the bit position being coded */
-    natural_order = cinfo->natural_order;
-
-    /* Load up working state */
-    BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
-    EOBRUN = entropy->saved.EOBRUN; /* only part of saved state we need */
-
-    /* There is always only one block per MCU */
-    block = MCU_data[0];
-    tbl = entropy->ac_derived_tbl;
-
-    /* If we are forced to suspend, we must undo the assignments to any newly
-     * nonzero coefficients in the block, because otherwise we'd get confused
-     * next time about which coefficients were already nonzero.
-     * But we need not undo addition of bits to already-nonzero coefficients;
-     * instead, we can test the current bit to see if we already did it.
-     */
-    num_newnz = 0;
-
-    /* initialize coefficient loop counter to start of band */
-    k = cinfo->Ss;
-
-    if (EOBRUN == 0) {
-      do {
-	HUFF_DECODE(s, br_state, tbl, goto undoit, label3);
-	r = s >> 4;
-	s &= 15;
-	if (s) {
-	  if (s != 1)		/* size of new coef should always be 1 */
-	    WARNMS(cinfo, JWRN_HUFF_BAD_CODE);
-	  CHECK_BIT_BUFFER(br_state, 1, goto undoit);
-	  if (GET_BITS(1))
-	    s = p1;		/* newly nonzero coef is positive */
-	  else
-	    s = m1;		/* newly nonzero coef is negative */
-	} else {
-	  if (r != 15) {
-	    EOBRUN = 1 << r;	/* EOBr, run length is 2^r + appended bits */
-	    if (r) {
-	      CHECK_BIT_BUFFER(br_state, r, goto undoit);
-	      r = GET_BITS(r);
-	      EOBRUN += r;
-	    }
-	    break;		/* rest of block is handled by EOB logic */
-	  }
-	  /* note s = 0 for processing ZRL */
-	}
-	/* Advance over already-nonzero coefs and r still-zero coefs,
-	 * appending correction bits to the nonzeroes.  A correction bit is 1
-	 * if the absolute value of the coefficient must be increased.
-	 */
-	do {
-	  thiscoef = *block + natural_order[k];
-	  if (*thiscoef) {
-	    CHECK_BIT_BUFFER(br_state, 1, goto undoit);
-	    if (GET_BITS(1)) {
-	      if ((*thiscoef & p1) == 0) { /* do nothing if already set it */
-		if (*thiscoef >= 0)
-		  *thiscoef += p1;
-		else
-		  *thiscoef += m1;
-	      }
-	    }
-	  } else {
-	    if (--r < 0)
-	      break;		/* reached target zero coefficient */
-	  }
-	  k++;
-	} while (k <= Se);
-	if (s) {
-	  int pos = natural_order[k];
-	  /* Output newly nonzero coefficient */
-	  (*block)[pos] = (JCOEF) s;
-	  /* Remember its position in case we have to suspend */
-	  newnz_pos[num_newnz++] = pos;
-	}
-	k++;
-      } while (k <= Se);
-    }
-
-    if (EOBRUN) {
-      /* Scan any remaining coefficient positions after the end-of-band
-       * (the last newly nonzero coefficient, if any).  Append a correction
-       * bit to each already-nonzero coefficient.  A correction bit is 1
-       * if the absolute value of the coefficient must be increased.
-       */
-      do {
-	thiscoef = *block + natural_order[k];
-	if (*thiscoef) {
-	  CHECK_BIT_BUFFER(br_state, 1, goto undoit);
-	  if (GET_BITS(1)) {
-	    if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */
-	      if (*thiscoef >= 0)
-		*thiscoef += p1;
-	      else
-		*thiscoef += m1;
-	    }
-	  }
-	}
-	k++;
-      } while (k <= Se);
-      /* Count one block completed in EOB run */
-      EOBRUN--;
-    }
-
-    /* Completed MCU, so update state */
-    BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
-    entropy->saved.EOBRUN = EOBRUN; /* only part of saved state we need */
-  }
-
-  /* Account for restart interval if using restarts */
-  if (cinfo->restart_interval)
-    entropy->restarts_to_go--;
-
-  return TRUE;
-
-undoit:
-  /* Re-zero any output coefficients that we made newly nonzero */
-  while (num_newnz)
-    (*block)[newnz_pos[--num_newnz]] = 0;
-
-  return FALSE;
-}
-
-
-/*
- * Decode one MCU's worth of Huffman-compressed coefficients,
- * partial blocks.
- */
-
-METHODDEF(boolean)
-decode_mcu_sub (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  const int * natural_order;
-  int Se, blkn;
-  BITREAD_STATE_VARS;
-  savable_state state;
-
-  /* Process restart marker if needed; may have to suspend */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      if (! process_restart(cinfo))
-	return FALSE;
-  }
-
-  /* If we've run out of data, just leave the MCU set to zeroes.
-   * This way, we return uniform gray for the remainder of the segment.
-   */
-  if (! entropy->insufficient_data) {
-
-    natural_order = cinfo->natural_order;
-    Se = cinfo->lim_Se;
-
-    /* Load up working state */
-    BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
-    ASSIGN_STATE(state, entropy->saved);
-
-    /* Outer loop handles each block in the MCU */
-
-    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-      JBLOCKROW block = MCU_data[blkn];
-      d_derived_tbl * htbl;
-      register int s, k, r;
-      int coef_limit, ci;
-
-      /* Decode a single block's worth of coefficients */
-
-      /* Section F.2.2.1: decode the DC coefficient difference */
-      htbl = entropy->dc_cur_tbls[blkn];
-      HUFF_DECODE(s, br_state, htbl, return FALSE, label1);
-
-      htbl = entropy->ac_cur_tbls[blkn];
-      k = 1;
-      coef_limit = entropy->coef_limit[blkn];
-      if (coef_limit) {
-	/* Convert DC difference to actual value, update last_dc_val */
-	if (s) {
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  r = GET_BITS(s);
-	  s = HUFF_EXTEND(r, s);
-	}
-	ci = cinfo->MCU_membership[blkn];
-	s += state.last_dc_val[ci];
-	state.last_dc_val[ci] = s;
-	/* Output the DC coefficient */
-	(*block)[0] = (JCOEF) s;
-
-	/* Section F.2.2.2: decode the AC coefficients */
-	/* Since zeroes are skipped, output area must be cleared beforehand */
-	for (; k < coef_limit; k++) {
-	  HUFF_DECODE(s, br_state, htbl, return FALSE, label2);
-
-	  r = s >> 4;
-	  s &= 15;
-
-	  if (s) {
-	    k += r;
-	    CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	    r = GET_BITS(s);
-	    s = HUFF_EXTEND(r, s);
-	    /* Output coefficient in natural (dezigzagged) order.
-	     * Note: the extra entries in natural_order[] will save us
-	     * if k > Se, which could happen if the data is corrupted.
-	     */
-	    (*block)[natural_order[k]] = (JCOEF) s;
-	  } else {
-	    if (r != 15)
-	      goto EndOfBlock;
-	    k += 15;
-	  }
-	}
-      } else {
-	if (s) {
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  DROP_BITS(s);
-	}
-      }
-
-      /* Section F.2.2.2: decode the AC coefficients */
-      /* In this path we just discard the values */
-      for (; k <= Se; k++) {
-	HUFF_DECODE(s, br_state, htbl, return FALSE, label3);
-
-	r = s >> 4;
-	s &= 15;
-
-	if (s) {
-	  k += r;
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  DROP_BITS(s);
-	} else {
-	  if (r != 15)
-	    break;
-	  k += 15;
-	}
-      }
-
-      EndOfBlock: ;
-    }
-
-    /* Completed MCU, so update state */
-    BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
-    ASSIGN_STATE(entropy->saved, state);
-  }
-
-  /* Account for restart interval if using restarts */
-  if (cinfo->restart_interval)
-    entropy->restarts_to_go--;
-
-  return TRUE;
-}
-
-
-/*
- * Decode one MCU's worth of Huffman-compressed coefficients,
- * full-size blocks.
- */
-
-METHODDEF(boolean)
-decode_mcu (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int blkn;
-  BITREAD_STATE_VARS;
-  savable_state state;
-
-  /* Process restart marker if needed; may have to suspend */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      if (! process_restart(cinfo))
-	return FALSE;
-  }
-
-  /* If we've run out of data, just leave the MCU set to zeroes.
-   * This way, we return uniform gray for the remainder of the segment.
-   */
-  if (! entropy->insufficient_data) {
-
-    /* Load up working state */
-    BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
-    ASSIGN_STATE(state, entropy->saved);
-
-    /* Outer loop handles each block in the MCU */
-
-    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-      JBLOCKROW block = MCU_data[blkn];
-      d_derived_tbl * htbl;
-      register int s, k, r;
-      int coef_limit, ci;
-
-      /* Decode a single block's worth of coefficients */
-
-      /* Section F.2.2.1: decode the DC coefficient difference */
-      htbl = entropy->dc_cur_tbls[blkn];
-      HUFF_DECODE(s, br_state, htbl, return FALSE, label1);
-
-      htbl = entropy->ac_cur_tbls[blkn];
-      k = 1;
-      coef_limit = entropy->coef_limit[blkn];
-      if (coef_limit) {
-	/* Convert DC difference to actual value, update last_dc_val */
-	if (s) {
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  r = GET_BITS(s);
-	  s = HUFF_EXTEND(r, s);
-	}
-	ci = cinfo->MCU_membership[blkn];
-	s += state.last_dc_val[ci];
-	state.last_dc_val[ci] = s;
-	/* Output the DC coefficient */
-	(*block)[0] = (JCOEF) s;
-
-	/* Section F.2.2.2: decode the AC coefficients */
-	/* Since zeroes are skipped, output area must be cleared beforehand */
-	for (; k < coef_limit; k++) {
-	  HUFF_DECODE(s, br_state, htbl, return FALSE, label2);
-
-	  r = s >> 4;
-	  s &= 15;
-
-	  if (s) {
-	    k += r;
-	    CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	    r = GET_BITS(s);
-	    s = HUFF_EXTEND(r, s);
-	    /* Output coefficient in natural (dezigzagged) order.
-	     * Note: the extra entries in jpeg_natural_order[] will save us
-	     * if k >= DCTSIZE2, which could happen if the data is corrupted.
-	     */
-	    (*block)[jpeg_natural_order[k]] = (JCOEF) s;
-	  } else {
-	    if (r != 15)
-	      goto EndOfBlock;
-	    k += 15;
-	  }
-	}
-      } else {
-	if (s) {
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  DROP_BITS(s);
-	}
-      }
-
-      /* Section F.2.2.2: decode the AC coefficients */
-      /* In this path we just discard the values */
-      for (; k < DCTSIZE2; k++) {
-	HUFF_DECODE(s, br_state, htbl, return FALSE, label3);
-
-	r = s >> 4;
-	s &= 15;
-
-	if (s) {
-	  k += r;
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  DROP_BITS(s);
-	} else {
-	  if (r != 15)
-	    break;
-	  k += 15;
-	}
-      }
-
-      EndOfBlock: ;
-    }
-
-    /* Completed MCU, so update state */
-    BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
-    ASSIGN_STATE(entropy->saved, state);
-  }
-
-  /* Account for restart interval if using restarts */
-  if (cinfo->restart_interval)
-    entropy->restarts_to_go--;
-
-  return TRUE;
-}
-
-
-/*
- * Initialize for a Huffman-compressed scan.
- */
-
-METHODDEF(void)
-start_pass_huff_decoder (j_decompress_ptr cinfo)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int ci, blkn, tbl, i;
-  jpeg_component_info * compptr;
-
-  if (cinfo->progressive_mode) {
-    /* Validate progressive scan parameters */
-    if (cinfo->Ss == 0) {
-      if (cinfo->Se != 0)
-	goto bad;
-    } else {
-      /* need not check Ss/Se < 0 since they came from unsigned bytes */
-      if (cinfo->Se < cinfo->Ss || cinfo->Se > cinfo->lim_Se)
-	goto bad;
-      /* AC scans may have only one component */
-      if (cinfo->comps_in_scan != 1)
-	goto bad;
-    }
-    if (cinfo->Ah != 0) {
-      /* Successive approximation refinement scan: must have Al = Ah-1. */
-      if (cinfo->Ah-1 != cinfo->Al)
-	goto bad;
-    }
-    if (cinfo->Al > 13) {	/* need not check for < 0 */
-      /* Arguably the maximum Al value should be less than 13 for 8-bit
-       * precision, but the spec doesn't say so, and we try to be liberal
-       * about what we accept.  Note: large Al values could result in
-       * out-of-range DC coefficients during early scans, leading to bizarre
-       * displays due to overflows in the IDCT math.  But we won't crash.
-       */
-      bad:
-      ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
-	       cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
-    }
-    /* Update progression status, and verify that scan order is legal.
-     * Note that inter-scan inconsistencies are treated as warnings
-     * not fatal errors ... not clear if this is right way to behave.
-     */
-    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-      int coefi, cindex = cinfo->cur_comp_info[ci]->component_index;
-      int *coef_bit_ptr = & cinfo->coef_bits[cindex][0];
-      if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
-	WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
-      for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
-	int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
-	if (cinfo->Ah != expected)
-	  WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
-	coef_bit_ptr[coefi] = cinfo->Al;
-      }
-    }
-
-    /* Select MCU decoding routine */
-    if (cinfo->Ah == 0) {
-      if (cinfo->Ss == 0)
-	entropy->pub.decode_mcu = decode_mcu_DC_first;
-      else
-	entropy->pub.decode_mcu = decode_mcu_AC_first;
-    } else {
-      if (cinfo->Ss == 0)
-	entropy->pub.decode_mcu = decode_mcu_DC_refine;
-      else
-	entropy->pub.decode_mcu = decode_mcu_AC_refine;
-    }
-
-    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-      compptr = cinfo->cur_comp_info[ci];
-      /* Make sure requested tables are present, and compute derived tables.
-       * We may build same derived table more than once, but it's not expensive.
-       */
-      if (cinfo->Ss == 0) {
-	if (cinfo->Ah == 0) {	/* DC refinement needs no table */
-	  tbl = compptr->dc_tbl_no;
-	  jpeg_make_d_derived_tbl(cinfo, TRUE, tbl,
-				  & entropy->derived_tbls[tbl]);
-	}
-      } else {
-	tbl = compptr->ac_tbl_no;
-	jpeg_make_d_derived_tbl(cinfo, FALSE, tbl,
-				& entropy->derived_tbls[tbl]);
-	/* remember the single active table */
-	entropy->ac_derived_tbl = entropy->derived_tbls[tbl];
-      }
-      /* Initialize DC predictions to 0 */
-      entropy->saved.last_dc_val[ci] = 0;
-    }
-
-    /* Initialize private state variables */
-    entropy->saved.EOBRUN = 0;
-  } else {
-    /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
-     * This ought to be an error condition, but we make it a warning because
-     * there are some baseline files out there with all zeroes in these bytes.
-     */
-    if (cinfo->Ss != 0 || cinfo->Ah != 0 || cinfo->Al != 0 ||
-	((cinfo->is_baseline || cinfo->Se < DCTSIZE2) &&
-	cinfo->Se != cinfo->lim_Se))
-      WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
-
-    /* Select MCU decoding routine */
-    /* We retain the hard-coded case for full-size blocks.
-     * This is not necessary, but it appears that this version is slightly
-     * more performant in the given implementation.
-     * With an improved implementation we would prefer a single optimized
-     * function.
-     */
-    if (cinfo->lim_Se != DCTSIZE2-1)
-      entropy->pub.decode_mcu = decode_mcu_sub;
-    else
-      entropy->pub.decode_mcu = decode_mcu;
-
-    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-      compptr = cinfo->cur_comp_info[ci];
-      /* Compute derived values for Huffman tables */
-      /* We may do this more than once for a table, but it's not expensive */
-      tbl = compptr->dc_tbl_no;
-      jpeg_make_d_derived_tbl(cinfo, TRUE, tbl,
-			      & entropy->dc_derived_tbls[tbl]);
-      if (cinfo->lim_Se) {	/* AC needs no table when not present */
-	tbl = compptr->ac_tbl_no;
-	jpeg_make_d_derived_tbl(cinfo, FALSE, tbl,
-				& entropy->ac_derived_tbls[tbl]);
-      }
-      /* Initialize DC predictions to 0 */
-      entropy->saved.last_dc_val[ci] = 0;
-    }
-
-    /* Precalculate decoding info for each block in an MCU of this scan */
-    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-      ci = cinfo->MCU_membership[blkn];
-      compptr = cinfo->cur_comp_info[ci];
-      /* Precalculate which table to use for each block */
-      entropy->dc_cur_tbls[blkn] = entropy->dc_derived_tbls[compptr->dc_tbl_no];
-      entropy->ac_cur_tbls[blkn] =	/* AC needs no table when not present */
-	cinfo->lim_Se ? entropy->ac_derived_tbls[compptr->ac_tbl_no] : NULL;
-      /* Decide whether we really care about the coefficient values */
-      if (compptr->component_needed) {
-	ci = compptr->DCT_v_scaled_size;
-	i = compptr->DCT_h_scaled_size;
-	switch (cinfo->lim_Se) {
-	case (1*1-1):
-	  entropy->coef_limit[blkn] = 1;
-	  break;
-	case (2*2-1):
-	  if (ci <= 0 || ci > 2) ci = 2;
-	  if (i <= 0 || i > 2) i = 2;
-	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order2[ci - 1][i - 1];
-	  break;
-	case (3*3-1):
-	  if (ci <= 0 || ci > 3) ci = 3;
-	  if (i <= 0 || i > 3) i = 3;
-	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order3[ci - 1][i - 1];
-	  break;
-	case (4*4-1):
-	  if (ci <= 0 || ci > 4) ci = 4;
-	  if (i <= 0 || i > 4) i = 4;
-	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order4[ci - 1][i - 1];
-	  break;
-	case (5*5-1):
-	  if (ci <= 0 || ci > 5) ci = 5;
-	  if (i <= 0 || i > 5) i = 5;
-	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order5[ci - 1][i - 1];
-	  break;
-	case (6*6-1):
-	  if (ci <= 0 || ci > 6) ci = 6;
-	  if (i <= 0 || i > 6) i = 6;
-	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order6[ci - 1][i - 1];
-	  break;
-	case (7*7-1):
-	  if (ci <= 0 || ci > 7) ci = 7;
-	  if (i <= 0 || i > 7) i = 7;
-	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order7[ci - 1][i - 1];
-	  break;
-	default:
-	  if (ci <= 0 || ci > 8) ci = 8;
-	  if (i <= 0 || i > 8) i = 8;
-	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order[ci - 1][i - 1];
-	}
-      } else {
-	entropy->coef_limit[blkn] = 0;
-      }
-    }
-  }
-
-  /* Initialize bitread state variables */
-  entropy->bitstate.bits_left = 0;
-  entropy->bitstate.get_buffer = 0; /* unnecessary, but keeps Purify quiet */
-  entropy->insufficient_data = FALSE;
-
-  /* Initialize restart counter */
-  entropy->restarts_to_go = cinfo->restart_interval;
-}
-
-
-/*
- * Module initialization routine for Huffman entropy decoding.
- */
-
-GLOBAL(void)
-jinit_huff_decoder (j_decompress_ptr cinfo)
-{
-  huff_entropy_ptr entropy;
-  int i;
-
-  entropy = (huff_entropy_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(huff_entropy_decoder));
-  cinfo->entropy = &entropy->pub;
-  entropy->pub.start_pass = start_pass_huff_decoder;
-  entropy->pub.finish_pass = finish_pass_huff;
-
-  if (cinfo->progressive_mode) {
-    /* Create progression status table */
-    int *coef_bit_ptr, ci;
-    cinfo->coef_bits = (int (*)[DCTSIZE2]) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       cinfo->num_components * DCTSIZE2 * SIZEOF(int));
-    coef_bit_ptr = & cinfo->coef_bits[0][0];
-    for (ci = 0; ci < cinfo->num_components; ci++)
-      for (i = 0; i < DCTSIZE2; i++)
-	*coef_bit_ptr++ = -1;
-
-    /* Mark derived tables unallocated */
-    for (i = 0; i < NUM_HUFF_TBLS; i++) {
-      entropy->derived_tbls[i] = NULL;
-    }
-  } else {
-    /* Mark derived tables unallocated */
-    for (i = 0; i < NUM_HUFF_TBLS; i++) {
-      entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
-    }
-  }
-}
diff --git a/dep/libjpeg/src/jdinput.c b/dep/libjpeg/src/jdinput.c
deleted file mode 100644
index 29fbef90b..000000000
--- a/dep/libjpeg/src/jdinput.c
+++ /dev/null
@@ -1,657 +0,0 @@
-/*
- * jdinput.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2002-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains input control logic for the JPEG decompressor.
- * These routines are concerned with controlling the decompressor's input
- * processing (marker reading and coefficient decoding).  The actual input
- * reading is done in jdmarker.c, jdhuff.c, and jdarith.c.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Private state */
-
-typedef struct {
-  struct jpeg_input_controller pub; /* public fields */
-
-  int inheaders;		/* Nonzero until first SOS is reached */
-} my_input_controller;
-
-typedef my_input_controller * my_inputctl_ptr;
-
-
-/* Forward declarations */
-METHODDEF(int) consume_markers JPP((j_decompress_ptr cinfo));
-
-
-/*
- * Routines to calculate various quantities related to the size of the image.
- */
-
-
-/*
- * Compute output image dimensions and related values.
- * NOTE: this is exported for possible use by application.
- * Hence it mustn't do anything that can't be done twice.
- */
-
-GLOBAL(void)
-jpeg_core_output_dimensions (j_decompress_ptr cinfo)
-/* Do computations that are needed before master selection phase.
- * This function is used for transcoding and full decompression.
- */
-{
-#ifdef IDCT_SCALING_SUPPORTED
-  int ci;
-  jpeg_component_info *compptr;
-
-  /* Compute actual output image dimensions and DCT scaling choices. */
-  if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom) {
-    /* Provide 1/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 1;
-    cinfo->min_DCT_v_scaled_size = 1;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 2) {
-    /* Provide 2/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 2L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 2L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 2;
-    cinfo->min_DCT_v_scaled_size = 2;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 3) {
-    /* Provide 3/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 3L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 3L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 3;
-    cinfo->min_DCT_v_scaled_size = 3;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 4) {
-    /* Provide 4/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 4L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 4L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 4;
-    cinfo->min_DCT_v_scaled_size = 4;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 5) {
-    /* Provide 5/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 5L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 5L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 5;
-    cinfo->min_DCT_v_scaled_size = 5;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 6) {
-    /* Provide 6/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 6L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 6L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 6;
-    cinfo->min_DCT_v_scaled_size = 6;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 7) {
-    /* Provide 7/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 7L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 7L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 7;
-    cinfo->min_DCT_v_scaled_size = 7;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 8) {
-    /* Provide 8/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 8L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 8L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 8;
-    cinfo->min_DCT_v_scaled_size = 8;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 9) {
-    /* Provide 9/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 9L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 9L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 9;
-    cinfo->min_DCT_v_scaled_size = 9;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 10) {
-    /* Provide 10/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 10L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 10L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 10;
-    cinfo->min_DCT_v_scaled_size = 10;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 11) {
-    /* Provide 11/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 11L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 11L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 11;
-    cinfo->min_DCT_v_scaled_size = 11;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 12) {
-    /* Provide 12/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 12L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 12L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 12;
-    cinfo->min_DCT_v_scaled_size = 12;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 13) {
-    /* Provide 13/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 13L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 13L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 13;
-    cinfo->min_DCT_v_scaled_size = 13;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 14) {
-    /* Provide 14/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 14L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 14L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 14;
-    cinfo->min_DCT_v_scaled_size = 14;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 15) {
-    /* Provide 15/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 15L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 15L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 15;
-    cinfo->min_DCT_v_scaled_size = 15;
-  } else {
-    /* Provide 16/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 16L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 16L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 16;
-    cinfo->min_DCT_v_scaled_size = 16;
-  }
-
-  /* Recompute dimensions of components */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size;
-    compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size;
-  }
-
-#else /* !IDCT_SCALING_SUPPORTED */
-
-  /* Hardwire it to "no scaling" */
-  cinfo->output_width = cinfo->image_width;
-  cinfo->output_height = cinfo->image_height;
-  /* initial_setup has already initialized DCT_scaled_size,
-   * and has computed unscaled downsampled_width and downsampled_height.
-   */
-
-#endif /* IDCT_SCALING_SUPPORTED */
-}
-
-
-LOCAL(void)
-initial_setup (j_decompress_ptr cinfo)
-/* Called once, when first SOS marker is reached */
-{
-  int ci;
-  jpeg_component_info *compptr;
-
-  /* Make sure image isn't bigger than I can handle */
-  if ((long) cinfo->image_height > (long) JPEG_MAX_DIMENSION ||
-      (long) cinfo->image_width > (long) JPEG_MAX_DIMENSION)
-    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
-
-  /* Only 8 to 12 bits data precision are supported for DCT based JPEG */
-  if (cinfo->data_precision < 8 || cinfo->data_precision > 12)
-    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
-
-  /* Check that number of components won't exceed internal array sizes */
-  if (cinfo->num_components > MAX_COMPONENTS)
-    ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
-	     MAX_COMPONENTS);
-
-  /* Compute maximum sampling factors; check factor validity */
-  cinfo->max_h_samp_factor = 1;
-  cinfo->max_v_samp_factor = 1;
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR ||
-	compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
-      ERREXIT(cinfo, JERR_BAD_SAMPLING);
-    cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
-				   compptr->h_samp_factor);
-    cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
-				   compptr->v_samp_factor);
-  }
-
-  /* Derive block_size, natural_order, and lim_Se */
-  if (cinfo->is_baseline || (cinfo->progressive_mode &&
-      cinfo->comps_in_scan)) { /* no pseudo SOS marker */
-    cinfo->block_size = DCTSIZE;
-    cinfo->natural_order = jpeg_natural_order;
-    cinfo->lim_Se = DCTSIZE2-1;
-  } else
-    switch (cinfo->Se) {
-    case (1*1-1):
-      cinfo->block_size = 1;
-      cinfo->natural_order = jpeg_natural_order; /* not needed */
-      cinfo->lim_Se = cinfo->Se;
-      break;
-    case (2*2-1):
-      cinfo->block_size = 2;
-      cinfo->natural_order = jpeg_natural_order2;
-      cinfo->lim_Se = cinfo->Se;
-      break;
-    case (3*3-1):
-      cinfo->block_size = 3;
-      cinfo->natural_order = jpeg_natural_order3;
-      cinfo->lim_Se = cinfo->Se;
-      break;
-    case (4*4-1):
-      cinfo->block_size = 4;
-      cinfo->natural_order = jpeg_natural_order4;
-      cinfo->lim_Se = cinfo->Se;
-      break;
-    case (5*5-1):
-      cinfo->block_size = 5;
-      cinfo->natural_order = jpeg_natural_order5;
-      cinfo->lim_Se = cinfo->Se;
-      break;
-    case (6*6-1):
-      cinfo->block_size = 6;
-      cinfo->natural_order = jpeg_natural_order6;
-      cinfo->lim_Se = cinfo->Se;
-      break;
-    case (7*7-1):
-      cinfo->block_size = 7;
-      cinfo->natural_order = jpeg_natural_order7;
-      cinfo->lim_Se = cinfo->Se;
-      break;
-    case (8*8-1):
-      cinfo->block_size = 8;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (9*9-1):
-      cinfo->block_size = 9;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (10*10-1):
-      cinfo->block_size = 10;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (11*11-1):
-      cinfo->block_size = 11;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (12*12-1):
-      cinfo->block_size = 12;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (13*13-1):
-      cinfo->block_size = 13;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (14*14-1):
-      cinfo->block_size = 14;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (15*15-1):
-      cinfo->block_size = 15;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (16*16-1):
-      cinfo->block_size = 16;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    default:
-      ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
-	       cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
-    }
-
-  /* We initialize DCT_scaled_size and min_DCT_scaled_size to block_size.
-   * In the full decompressor,
-   * this will be overridden by jpeg_calc_output_dimensions in jdmaster.c;
-   * but in the transcoder,
-   * jpeg_calc_output_dimensions is not used, so we must do it here.
-   */
-  cinfo->min_DCT_h_scaled_size = cinfo->block_size;
-  cinfo->min_DCT_v_scaled_size = cinfo->block_size;
-
-  /* Compute dimensions of components */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    compptr->DCT_h_scaled_size = cinfo->block_size;
-    compptr->DCT_v_scaled_size = cinfo->block_size;
-    /* Size in DCT blocks */
-    compptr->width_in_blocks = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
-		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
-    compptr->height_in_blocks = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
-		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
-    /* downsampled_width and downsampled_height will also be overridden by
-     * jdmaster.c if we are doing full decompression.  The transcoder library
-     * doesn't use these values, but the calling application might.
-     */
-    /* Size in samples */
-    compptr->downsampled_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
-		    (long) cinfo->max_h_samp_factor);
-    compptr->downsampled_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
-		    (long) cinfo->max_v_samp_factor);
-    /* Mark component needed, until color conversion says otherwise */
-    compptr->component_needed = TRUE;
-    /* Mark no quantization table yet saved for component */
-    compptr->quant_table = NULL;
-  }
-
-  /* Compute number of fully interleaved MCU rows. */
-  cinfo->total_iMCU_rows = (JDIMENSION)
-    jdiv_round_up((long) cinfo->image_height,
-	          (long) (cinfo->max_v_samp_factor * cinfo->block_size));
-
-  /* Decide whether file contains multiple scans */
-  if (cinfo->comps_in_scan < cinfo->num_components || cinfo->progressive_mode)
-    cinfo->inputctl->has_multiple_scans = TRUE;
-  else
-    cinfo->inputctl->has_multiple_scans = FALSE;
-}
-
-
-LOCAL(void)
-per_scan_setup (j_decompress_ptr cinfo)
-/* Do computations that are needed before processing a JPEG scan */
-/* cinfo->comps_in_scan and cinfo->cur_comp_info[] were set from SOS marker */
-{
-  int ci, mcublks, tmp;
-  jpeg_component_info *compptr;
-
-  if (cinfo->comps_in_scan == 1) {
-
-    /* Noninterleaved (single-component) scan */
-    compptr = cinfo->cur_comp_info[0];
-
-    /* Overall image size in MCUs */
-    cinfo->MCUs_per_row = compptr->width_in_blocks;
-    cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
-
-    /* For noninterleaved scan, always one block per MCU */
-    compptr->MCU_width = 1;
-    compptr->MCU_height = 1;
-    compptr->MCU_blocks = 1;
-    compptr->MCU_sample_width = compptr->DCT_h_scaled_size;
-    compptr->last_col_width = 1;
-    /* For noninterleaved scans, it is convenient to define last_row_height
-     * as the number of block rows present in the last iMCU row.
-     */
-    tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
-    if (tmp == 0) tmp = compptr->v_samp_factor;
-    compptr->last_row_height = tmp;
-
-    /* Prepare array describing MCU composition */
-    cinfo->blocks_in_MCU = 1;
-    cinfo->MCU_membership[0] = 0;
-
-  } else {
-
-    /* Interleaved (multi-component) scan */
-    if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
-      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
-	       MAX_COMPS_IN_SCAN);
-
-    /* Overall image size in MCUs */
-    cinfo->MCUs_per_row = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width,
-		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
-    cinfo->MCU_rows_in_scan = cinfo->total_iMCU_rows;
-
-    cinfo->blocks_in_MCU = 0;
-
-    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-      compptr = cinfo->cur_comp_info[ci];
-      /* Sampling factors give # of blocks of component in each MCU */
-      compptr->MCU_width = compptr->h_samp_factor;
-      compptr->MCU_height = compptr->v_samp_factor;
-      compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
-      compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_h_scaled_size;
-      /* Figure number of non-dummy blocks in last MCU column & row */
-      tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
-      if (tmp == 0) tmp = compptr->MCU_width;
-      compptr->last_col_width = tmp;
-      tmp = (int) (compptr->height_in_blocks % compptr->MCU_height);
-      if (tmp == 0) tmp = compptr->MCU_height;
-      compptr->last_row_height = tmp;
-      /* Prepare array describing MCU composition */
-      mcublks = compptr->MCU_blocks;
-      if (cinfo->blocks_in_MCU + mcublks > D_MAX_BLOCKS_IN_MCU)
-	ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
-      while (mcublks-- > 0) {
-	cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
-      }
-    }
-
-  }
-}
-
-
-/*
- * Save away a copy of the Q-table referenced by each component present
- * in the current scan, unless already saved during a prior scan.
- *
- * In a multiple-scan JPEG file, the encoder could assign different components
- * the same Q-table slot number, but change table definitions between scans
- * so that each component uses a different Q-table.  (The IJG encoder is not
- * currently capable of doing this, but other encoders might.)  Since we want
- * to be able to dequantize all the components at the end of the file, this
- * means that we have to save away the table actually used for each component.
- * We do this by copying the table at the start of the first scan containing
- * the component.
- * The JPEG spec prohibits the encoder from changing the contents of a Q-table
- * slot between scans of a component using that slot.  If the encoder does so
- * anyway, this decoder will simply use the Q-table values that were current
- * at the start of the first scan for the component.
- *
- * The decompressor output side looks only at the saved quant tables,
- * not at the current Q-table slots.
- */
-
-LOCAL(void)
-latch_quant_tables (j_decompress_ptr cinfo)
-{
-  int ci, qtblno;
-  jpeg_component_info *compptr;
-  JQUANT_TBL * qtbl;
-
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    /* No work if we already saved Q-table for this component */
-    if (compptr->quant_table != NULL)
-      continue;
-    /* Make sure specified quantization table is present */
-    qtblno = compptr->quant_tbl_no;
-    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
-	cinfo->quant_tbl_ptrs[qtblno] == NULL)
-      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
-    /* OK, save away the quantization table */
-    qtbl = (JQUANT_TBL *) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(JQUANT_TBL));
-    MEMCOPY(qtbl, cinfo->quant_tbl_ptrs[qtblno], SIZEOF(JQUANT_TBL));
-    compptr->quant_table = qtbl;
-  }
-}
-
-
-/*
- * Initialize the input modules to read a scan of compressed data.
- * The first call to this is done by jdmaster.c after initializing
- * the entire decompressor (during jpeg_start_decompress).
- * Subsequent calls come from consume_markers, below.
- */
-
-METHODDEF(void)
-start_input_pass (j_decompress_ptr cinfo)
-{
-  per_scan_setup(cinfo);
-  latch_quant_tables(cinfo);
-  (*cinfo->entropy->start_pass) (cinfo);
-  (*cinfo->coef->start_input_pass) (cinfo);
-  cinfo->inputctl->consume_input = cinfo->coef->consume_data;
-}
-
-
-/*
- * Finish up after inputting a compressed-data scan.
- * This is called by the coefficient controller after it's read all
- * the expected data of the scan.
- */
-
-METHODDEF(void)
-finish_input_pass (j_decompress_ptr cinfo)
-{
-  (*cinfo->entropy->finish_pass) (cinfo);
-  cinfo->inputctl->consume_input = consume_markers;
-}
-
-
-/*
- * Read JPEG markers before, between, or after compressed-data scans.
- * Change state as necessary when a new scan is reached.
- * Return value is JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
- *
- * The consume_input method pointer points either here or to the
- * coefficient controller's consume_data routine, depending on whether
- * we are reading a compressed data segment or inter-segment markers.
- *
- * Note: This function should NOT return a pseudo SOS marker (with zero
- * component number) to the caller.  A pseudo marker received by
- * read_markers is processed and then skipped for other markers.
- */
-
-METHODDEF(int)
-consume_markers (j_decompress_ptr cinfo)
-{
-  my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl;
-  int val;
-
-  if (inputctl->pub.eoi_reached) /* After hitting EOI, read no further */
-    return JPEG_REACHED_EOI;
-
-  for (;;) {			/* Loop to pass pseudo SOS marker */
-    val = (*cinfo->marker->read_markers) (cinfo);
-
-    switch (val) {
-    case JPEG_REACHED_SOS:	/* Found SOS */
-      if (inputctl->inheaders) { /* 1st SOS */
-	if (inputctl->inheaders == 1)
-	  initial_setup(cinfo);
-	if (cinfo->comps_in_scan == 0) { /* pseudo SOS marker */
-	  inputctl->inheaders = 2;
-	  break;
-	}
-	inputctl->inheaders = 0;
-	/* Note: start_input_pass must be called by jdmaster.c
-	 * before any more input can be consumed.  jdapimin.c is
-	 * responsible for enforcing this sequencing.
-	 */
-      } else {			/* 2nd or later SOS marker */
-	if (! inputctl->pub.has_multiple_scans)
-	  ERREXIT(cinfo, JERR_EOI_EXPECTED); /* Oops, I wasn't expecting this! */
-	if (cinfo->comps_in_scan == 0) /* unexpected pseudo SOS marker */
-	  break;
-	start_input_pass(cinfo);
-      }
-      return val;
-    case JPEG_REACHED_EOI:	/* Found EOI */
-      inputctl->pub.eoi_reached = TRUE;
-      if (inputctl->inheaders) { /* Tables-only datastream, apparently */
-	if (cinfo->marker->saw_SOF)
-	  ERREXIT(cinfo, JERR_SOF_NO_SOS);
-      } else {
-	/* Prevent infinite loop in coef ctlr's decompress_data routine
-	 * if user set output_scan_number larger than number of scans.
-	 */
-	if (cinfo->output_scan_number > cinfo->input_scan_number)
-	  cinfo->output_scan_number = cinfo->input_scan_number;
-      }
-      return val;
-    case JPEG_SUSPENDED:
-      return val;
-    default:
-      return val;
-    }
-  }
-}
-
-
-/*
- * Reset state to begin a fresh datastream.
- */
-
-METHODDEF(void)
-reset_input_controller (j_decompress_ptr cinfo)
-{
-  my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl;
-
-  inputctl->pub.consume_input = consume_markers;
-  inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */
-  inputctl->pub.eoi_reached = FALSE;
-  inputctl->inheaders = 1;
-  /* Reset other modules */
-  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
-  (*cinfo->marker->reset_marker_reader) (cinfo);
-  /* Reset progression state -- would be cleaner if entropy decoder did this */
-  cinfo->coef_bits = NULL;
-}
-
-
-/*
- * Initialize the input controller module.
- * This is called only once, when the decompression object is created.
- */
-
-GLOBAL(void)
-jinit_input_controller (j_decompress_ptr cinfo)
-{
-  my_inputctl_ptr inputctl;
-
-  /* Create subobject in permanent pool */
-  inputctl = (my_inputctl_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_PERMANENT, SIZEOF(my_input_controller));
-  cinfo->inputctl = &inputctl->pub;
-  /* Initialize method pointers */
-  inputctl->pub.consume_input = consume_markers;
-  inputctl->pub.reset_input_controller = reset_input_controller;
-  inputctl->pub.start_input_pass = start_input_pass;
-  inputctl->pub.finish_input_pass = finish_input_pass;
-  /* Initialize state: can't use reset_input_controller since we don't
-   * want to try to reset other modules yet.
-   */
-  inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */
-  inputctl->pub.eoi_reached = FALSE;
-  inputctl->inheaders = 1;
-}
diff --git a/dep/libjpeg/src/jdmainct.c b/dep/libjpeg/src/jdmainct.c
deleted file mode 100644
index 1cd66d853..000000000
--- a/dep/libjpeg/src/jdmainct.c
+++ /dev/null
@@ -1,511 +0,0 @@
-/*
- * jdmainct.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2002-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the main buffer controller for decompression.
- * The main buffer lies between the JPEG decompressor proper and the
- * post-processor; it holds downsampled data in the JPEG colorspace.
- *
- * Note that this code is bypassed in raw-data mode, since the application
- * supplies the equivalent of the main buffer in that case.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * In the current system design, the main buffer need never be a full-image
- * buffer; any full-height buffers will be found inside the coefficient or
- * postprocessing controllers.  Nonetheless, the main controller is not
- * trivial.  Its responsibility is to provide context rows for upsampling/
- * rescaling, and doing this in an efficient fashion is a bit tricky.
- *
- * Postprocessor input data is counted in "row groups".  A row group is
- * defined to be (v_samp_factor * DCT_v_scaled_size / min_DCT_v_scaled_size)
- * sample rows of each component.  (We require DCT_scaled_size values to be
- * chosen such that these numbers are integers.  In practice DCT_scaled_size
- * values will likely be powers of two, so we actually have the stronger
- * condition that DCT_scaled_size / min_DCT_scaled_size is an integer.)
- * Upsampling will typically produce max_v_samp_factor pixel rows from each
- * row group (times any additional scale factor that the upsampler is
- * applying).
- *
- * The coefficient controller will deliver data to us one iMCU row at a time;
- * each iMCU row contains v_samp_factor * DCT_v_scaled_size sample rows, or
- * exactly min_DCT_v_scaled_size row groups.  (This amount of data corresponds
- * to one row of MCUs when the image is fully interleaved.)  Note that the
- * number of sample rows varies across components, but the number of row
- * groups does not.  Some garbage sample rows may be included in the last iMCU
- * row at the bottom of the image.
- *
- * Depending on the vertical scaling algorithm used, the upsampler may need
- * access to the sample row(s) above and below its current input row group.
- * The upsampler is required to set need_context_rows TRUE at global selection
- * time if so.  When need_context_rows is FALSE, this controller can simply
- * obtain one iMCU row at a time from the coefficient controller and dole it
- * out as row groups to the postprocessor.
- *
- * When need_context_rows is TRUE, this controller guarantees that the buffer
- * passed to postprocessing contains at least one row group's worth of samples
- * above and below the row group(s) being processed.  Note that the context
- * rows "above" the first passed row group appear at negative row offsets in
- * the passed buffer.  At the top and bottom of the image, the required
- * context rows are manufactured by duplicating the first or last real sample
- * row; this avoids having special cases in the upsampling inner loops.
- *
- * The amount of context is fixed at one row group just because that's a
- * convenient number for this controller to work with.  The existing
- * upsamplers really only need one sample row of context.  An upsampler
- * supporting arbitrary output rescaling might wish for more than one row
- * group of context when shrinking the image; tough, we don't handle that.
- * (This is justified by the assumption that downsizing will be handled mostly
- * by adjusting the DCT_scaled_size values, so that the actual scale factor at
- * the upsample step needn't be much less than one.)
- *
- * To provide the desired context, we have to retain the last two row groups
- * of one iMCU row while reading in the next iMCU row.  (The last row group
- * can't be processed until we have another row group for its below-context,
- * and so we have to save the next-to-last group too for its above-context.)
- * We could do this most simply by copying data around in our buffer, but
- * that'd be very slow.  We can avoid copying any data by creating a rather
- * strange pointer structure.  Here's how it works.  We allocate a workspace
- * consisting of M+2 row groups (where M = min_DCT_v_scaled_size is the number
- * of row groups per iMCU row).  We create two sets of redundant pointers to
- * the workspace.  Labeling the physical row groups 0 to M+1, the synthesized
- * pointer lists look like this:
- *                   M+1                          M-1
- * master pointer --> 0         master pointer --> 0
- *                    1                            1
- *                   ...                          ...
- *                   M-3                          M-3
- *                   M-2                           M
- *                   M-1                          M+1
- *                    M                           M-2
- *                   M+1                          M-1
- *                    0                            0
- * We read alternate iMCU rows using each master pointer; thus the last two
- * row groups of the previous iMCU row remain un-overwritten in the workspace.
- * The pointer lists are set up so that the required context rows appear to
- * be adjacent to the proper places when we pass the pointer lists to the
- * upsampler.
- *
- * The above pictures describe the normal state of the pointer lists.
- * At top and bottom of the image, we diddle the pointer lists to duplicate
- * the first or last sample row as necessary (this is cheaper than copying
- * sample rows around).
- *
- * This scheme breaks down if M < 2, ie, min_DCT_v_scaled_size is 1.  In that
- * situation each iMCU row provides only one row group so the buffering logic
- * must be different (eg, we must read two iMCU rows before we can emit the
- * first row group).  For now, we simply do not support providing context
- * rows when min_DCT_v_scaled_size is 1.  That combination seems unlikely to
- * be worth providing --- if someone wants a 1/8th-size preview, they probably
- * want it quick and dirty, so a context-free upsampler is sufficient.
- */
-
-
-/* Private buffer controller object */
-
-typedef struct {
-  struct jpeg_d_main_controller pub; /* public fields */
-
-  /* Pointer to allocated workspace (M or M+2 row groups). */
-  JSAMPARRAY buffer[MAX_COMPONENTS];
-
-  JDIMENSION rowgroup_ctr;	/* counts row groups output to postprocessor */
-  JDIMENSION rowgroups_avail;	/* row groups available to postprocessor */
-
-  /* Remaining fields are only used in the context case. */
-
-  boolean buffer_full;		/* Have we gotten an iMCU row from decoder? */
-
-  /* These are the master pointers to the funny-order pointer lists. */
-  JSAMPIMAGE xbuffer[2];	/* pointers to weird pointer lists */
-
-  int whichptr;			/* indicates which pointer set is now in use */
-  int context_state;		/* process_data state machine status */
-  JDIMENSION iMCU_row_ctr;	/* counts iMCU rows to detect image top/bot */
-} my_main_controller;
-
-typedef my_main_controller * my_main_ptr;
-
-/* context_state values: */
-#define CTX_PREPARE_FOR_IMCU	0	/* need to prepare for MCU row */
-#define CTX_PROCESS_IMCU	1	/* feeding iMCU to postprocessor */
-#define CTX_POSTPONED_ROW	2	/* feeding postponed row group */
-
-
-/* Forward declarations */
-METHODDEF(void) process_data_simple_main
-	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
-	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
-METHODDEF(void) process_data_context_main
-	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
-	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
-#ifdef QUANT_2PASS_SUPPORTED
-METHODDEF(void) process_data_crank_post
-	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
-	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
-#endif
-
-
-LOCAL(void)
-alloc_funny_pointers (j_decompress_ptr cinfo)
-/* Allocate space for the funny pointer lists.
- * This is done only once, not once per pass.
- */
-{
-  my_main_ptr mainp = (my_main_ptr) cinfo->main;
-  int ci, rgroup;
-  int M = cinfo->min_DCT_v_scaled_size;
-  jpeg_component_info *compptr;
-  JSAMPARRAY xbuf;
-
-  /* Get top-level space for component array pointers.
-   * We alloc both arrays with one call to save a few cycles.
-   */
-  mainp->xbuffer[0] = (JSAMPIMAGE) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE,
-     cinfo->num_components * 2 * SIZEOF(JSAMPARRAY));
-  mainp->xbuffer[1] = mainp->xbuffer[0] + cinfo->num_components;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    if (! compptr->component_needed)
-      continue;			/* skip uninteresting component */
-    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
-      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
-    /* Get space for pointer lists --- M+4 row groups in each list.
-     * We alloc both pointer lists with one call to save a few cycles.
-     */
-    xbuf = (JSAMPARRAY) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo,
-      JPOOL_IMAGE, 2 * (rgroup * (M + 4)) * SIZEOF(JSAMPROW));
-    xbuf += rgroup;		/* want one row group at negative offsets */
-    mainp->xbuffer[0][ci] = xbuf;
-    xbuf += rgroup * (M + 4);
-    mainp->xbuffer[1][ci] = xbuf;
-  }
-}
-
-
-LOCAL(void)
-make_funny_pointers (j_decompress_ptr cinfo)
-/* Create the funny pointer lists discussed in the comments above.
- * The actual workspace is already allocated (in mainp->buffer),
- * and the space for the pointer lists is allocated too.
- * This routine just fills in the curiously ordered lists.
- * This will be repeated at the beginning of each pass.
- */
-{
-  my_main_ptr mainp = (my_main_ptr) cinfo->main;
-  int ci, i, rgroup;
-  int M = cinfo->min_DCT_v_scaled_size;
-  jpeg_component_info *compptr;
-  JSAMPARRAY buf, xbuf0, xbuf1;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    if (! compptr->component_needed)
-      continue;			/* skip uninteresting component */
-    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
-      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
-    xbuf0 = mainp->xbuffer[0][ci];
-    xbuf1 = mainp->xbuffer[1][ci];
-    /* First copy the workspace pointers as-is */
-    buf = mainp->buffer[ci];
-    for (i = 0; i < rgroup * (M + 2); i++) {
-      xbuf0[i] = xbuf1[i] = buf[i];
-    }
-    /* In the second list, put the last four row groups in swapped order */
-    for (i = 0; i < rgroup * 2; i++) {
-      xbuf1[rgroup*(M-2) + i] = buf[rgroup*M + i];
-      xbuf1[rgroup*M + i] = buf[rgroup*(M-2) + i];
-    }
-    /* The wraparound pointers at top and bottom will be filled later
-     * (see set_wraparound_pointers, below).  Initially we want the "above"
-     * pointers to duplicate the first actual data line.  This only needs
-     * to happen in xbuffer[0].
-     */
-    for (i = 0; i < rgroup; i++) {
-      xbuf0[i - rgroup] = xbuf0[0];
-    }
-  }
-}
-
-
-LOCAL(void)
-set_wraparound_pointers (j_decompress_ptr cinfo)
-/* Set up the "wraparound" pointers at top and bottom of the pointer lists.
- * This changes the pointer list state from top-of-image to the normal state.
- */
-{
-  my_main_ptr mainp = (my_main_ptr) cinfo->main;
-  int ci, i, rgroup;
-  int M = cinfo->min_DCT_v_scaled_size;
-  jpeg_component_info *compptr;
-  JSAMPARRAY xbuf0, xbuf1;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    if (! compptr->component_needed)
-      continue;			/* skip uninteresting component */
-    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
-      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
-    xbuf0 = mainp->xbuffer[0][ci];
-    xbuf1 = mainp->xbuffer[1][ci];
-    for (i = 0; i < rgroup; i++) {
-      xbuf0[i - rgroup] = xbuf0[rgroup*(M+1) + i];
-      xbuf1[i - rgroup] = xbuf1[rgroup*(M+1) + i];
-      xbuf0[rgroup*(M+2) + i] = xbuf0[i];
-      xbuf1[rgroup*(M+2) + i] = xbuf1[i];
-    }
-  }
-}
-
-
-LOCAL(void)
-set_bottom_pointers (j_decompress_ptr cinfo)
-/* Change the pointer lists to duplicate the last sample row at the bottom
- * of the image.  whichptr indicates which xbuffer holds the final iMCU row.
- * Also sets rowgroups_avail to indicate number of nondummy row groups in row.
- */
-{
-  my_main_ptr mainp = (my_main_ptr) cinfo->main;
-  int ci, i, rgroup, iMCUheight, rows_left;
-  jpeg_component_info *compptr;
-  JSAMPARRAY xbuf;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    if (! compptr->component_needed)
-      continue;			/* skip uninteresting component */
-    /* Count sample rows in one iMCU row and in one row group */
-    iMCUheight = compptr->v_samp_factor * compptr->DCT_v_scaled_size;
-    rgroup = iMCUheight / cinfo->min_DCT_v_scaled_size;
-    /* Count nondummy sample rows remaining for this component */
-    rows_left = (int) (compptr->downsampled_height % (JDIMENSION) iMCUheight);
-    if (rows_left == 0) rows_left = iMCUheight;
-    /* Count nondummy row groups.  Should get same answer for each component,
-     * so we need only do it once.
-     */
-    if (ci == 0) {
-      mainp->rowgroups_avail = (JDIMENSION) ((rows_left-1) / rgroup + 1);
-    }
-    /* Duplicate the last real sample row rgroup*2 times; this pads out the
-     * last partial rowgroup and ensures at least one full rowgroup of context.
-     */
-    xbuf = mainp->xbuffer[mainp->whichptr][ci];
-    for (i = 0; i < rgroup * 2; i++) {
-      xbuf[rows_left + i] = xbuf[rows_left-1];
-    }
-  }
-}
-
-
-/*
- * Initialize for a processing pass.
- */
-
-METHODDEF(void)
-start_pass_main (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
-{
-  my_main_ptr mainp = (my_main_ptr) cinfo->main;
-
-  switch (pass_mode) {
-  case JBUF_PASS_THRU:
-    if (cinfo->upsample->need_context_rows) {
-      mainp->pub.process_data = process_data_context_main;
-      make_funny_pointers(cinfo); /* Create the xbuffer[] lists */
-      mainp->whichptr = 0;	/* Read first iMCU row into xbuffer[0] */
-      mainp->context_state = CTX_PREPARE_FOR_IMCU;
-      mainp->iMCU_row_ctr = 0;
-      mainp->buffer_full = FALSE; /* Mark buffer empty */
-    } else {
-      /* Simple case with no context needed */
-      mainp->pub.process_data = process_data_simple_main;
-      mainp->rowgroup_ctr = mainp->rowgroups_avail; /* Mark buffer empty */
-    }
-    break;
-#ifdef QUANT_2PASS_SUPPORTED
-  case JBUF_CRANK_DEST:
-    /* For last pass of 2-pass quantization, just crank the postprocessor */
-    mainp->pub.process_data = process_data_crank_post;
-    break;
-#endif
-  default:
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-  }
-}
-
-
-/*
- * Process some data.
- * This handles the simple case where no context is required.
- */
-
-METHODDEF(void)
-process_data_simple_main (j_decompress_ptr cinfo, JSAMPARRAY output_buf,
-			  JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
-{
-  my_main_ptr mainp = (my_main_ptr) cinfo->main;
-
-  /* Read input data if we haven't filled the main buffer yet */
-  if (mainp->rowgroup_ctr >= mainp->rowgroups_avail) {
-    if (! (*cinfo->coef->decompress_data) (cinfo, mainp->buffer))
-      return;			/* suspension forced, can do nothing more */
-    mainp->rowgroup_ctr = 0;	/* OK, we have an iMCU row to work with */
-  }
-
-  /* Note: at the bottom of the image, we may pass extra garbage row groups
-   * to the postprocessor.  The postprocessor has to check for bottom
-   * of image anyway (at row resolution), so no point in us doing it too.
-   */
-
-  /* Feed the postprocessor */
-  (*cinfo->post->post_process_data) (cinfo, mainp->buffer,
-			&mainp->rowgroup_ctr, mainp->rowgroups_avail,
-			output_buf, out_row_ctr, out_rows_avail);
-}
-
-
-/*
- * Process some data.
- * This handles the case where context rows must be provided.
- */
-
-METHODDEF(void)
-process_data_context_main (j_decompress_ptr cinfo, JSAMPARRAY output_buf,
-			   JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
-{
-  my_main_ptr mainp = (my_main_ptr) cinfo->main;
-
-  /* Read input data if we haven't filled the main buffer yet */
-  if (! mainp->buffer_full) {
-    if (! (*cinfo->coef->decompress_data) (cinfo,
-					   mainp->xbuffer[mainp->whichptr]))
-      return;			/* suspension forced, can do nothing more */
-    mainp->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
-    mainp->iMCU_row_ctr++;	/* count rows received */
-  }
-
-  /* Postprocessor typically will not swallow all the input data it is handed
-   * in one call (due to filling the output buffer first).  Must be prepared
-   * to exit and restart.  This switch lets us keep track of how far we got.
-   * Note that each case falls through to the next on successful completion.
-   */
-  switch (mainp->context_state) {
-  case CTX_POSTPONED_ROW:
-    /* Call postprocessor using previously set pointers for postponed row */
-    (*cinfo->post->post_process_data) (cinfo, mainp->xbuffer[mainp->whichptr],
-			&mainp->rowgroup_ctr, mainp->rowgroups_avail,
-			output_buf, out_row_ctr, out_rows_avail);
-    if (mainp->rowgroup_ctr < mainp->rowgroups_avail)
-      return;			/* Need to suspend */
-    mainp->context_state = CTX_PREPARE_FOR_IMCU;
-    if (*out_row_ctr >= out_rows_avail)
-      return;			/* Postprocessor exactly filled output buf */
-    /*FALLTHROUGH*/
-  case CTX_PREPARE_FOR_IMCU:
-    /* Prepare to process first M-1 row groups of this iMCU row */
-    mainp->rowgroup_ctr = 0;
-    mainp->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_v_scaled_size - 1);
-    /* Check for bottom of image: if so, tweak pointers to "duplicate"
-     * the last sample row, and adjust rowgroups_avail to ignore padding rows.
-     */
-    if (mainp->iMCU_row_ctr == cinfo->total_iMCU_rows)
-      set_bottom_pointers(cinfo);
-    mainp->context_state = CTX_PROCESS_IMCU;
-    /*FALLTHROUGH*/
-  case CTX_PROCESS_IMCU:
-    /* Call postprocessor using previously set pointers */
-    (*cinfo->post->post_process_data) (cinfo, mainp->xbuffer[mainp->whichptr],
-			&mainp->rowgroup_ctr, mainp->rowgroups_avail,
-			output_buf, out_row_ctr, out_rows_avail);
-    if (mainp->rowgroup_ctr < mainp->rowgroups_avail)
-      return;			/* Need to suspend */
-    /* After the first iMCU, change wraparound pointers to normal state */
-    if (mainp->iMCU_row_ctr == 1)
-      set_wraparound_pointers(cinfo);
-    /* Prepare to load new iMCU row using other xbuffer list */
-    mainp->whichptr ^= 1;	/* 0=>1 or 1=>0 */
-    mainp->buffer_full = FALSE;
-    /* Still need to process last row group of this iMCU row, */
-    /* which is saved at index M+1 of the other xbuffer */
-    mainp->rowgroup_ctr = (JDIMENSION) (cinfo->min_DCT_v_scaled_size + 1);
-    mainp->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_v_scaled_size + 2);
-    mainp->context_state = CTX_POSTPONED_ROW;
-  }
-}
-
-
-/*
- * Process some data.
- * Final pass of two-pass quantization: just call the postprocessor.
- * Source data will be the postprocessor controller's internal buffer.
- */
-
-#ifdef QUANT_2PASS_SUPPORTED
-
-METHODDEF(void)
-process_data_crank_post (j_decompress_ptr cinfo, JSAMPARRAY output_buf,
-			 JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
-{
-  (*cinfo->post->post_process_data) (cinfo, (JSAMPIMAGE) NULL,
-			(JDIMENSION *) NULL, (JDIMENSION) 0,
-			output_buf, out_row_ctr, out_rows_avail);
-}
-
-#endif /* QUANT_2PASS_SUPPORTED */
-
-
-/*
- * Initialize main buffer controller.
- */
-
-GLOBAL(void)
-jinit_d_main_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
-{
-  my_main_ptr mainp;
-  int ci, rgroup, ngroups;
-  jpeg_component_info *compptr;
-
-  mainp = (my_main_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_main_controller));
-  cinfo->main = &mainp->pub;
-  mainp->pub.start_pass = start_pass_main;
-
-  if (need_full_buffer)		/* shouldn't happen */
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-
-  /* Allocate the workspace.
-   * ngroups is the number of row groups we need.
-   */
-  if (cinfo->upsample->need_context_rows) {
-    if (cinfo->min_DCT_v_scaled_size < 2) /* unsupported, see comments above */
-      ERREXIT(cinfo, JERR_NOTIMPL);
-    alloc_funny_pointers(cinfo); /* Alloc space for xbuffer[] lists */
-    ngroups = cinfo->min_DCT_v_scaled_size + 2;
-  } else {
-    /* There are always min_DCT_v_scaled_size row groups in an iMCU row. */
-    ngroups = cinfo->min_DCT_v_scaled_size;
-    mainp->rowgroups_avail = (JDIMENSION) ngroups;
-  }
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    if (! compptr->component_needed)
-      continue;			/* skip uninteresting component */
-    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
-      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
-    mainp->buffer[ci] = (*cinfo->mem->alloc_sarray)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       compptr->width_in_blocks * ((JDIMENSION) compptr->DCT_h_scaled_size),
-       (JDIMENSION) (rgroup * ngroups));
-  }
-}
diff --git a/dep/libjpeg/src/jdmarker.c b/dep/libjpeg/src/jdmarker.c
deleted file mode 100644
index c10fde605..000000000
--- a/dep/libjpeg/src/jdmarker.c
+++ /dev/null
@@ -1,1505 +0,0 @@
-/*
- * jdmarker.c
- *
- * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2009-2019 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains routines to decode JPEG datastream markers.
- * Most of the complexity arises from our desire to support input
- * suspension: if not all of the data for a marker is available,
- * we must exit back to the application.  On resumption, we reprocess
- * the marker.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-typedef enum {			/* JPEG marker codes */
-  M_SOF0  = 0xc0,
-  M_SOF1  = 0xc1,
-  M_SOF2  = 0xc2,
-  M_SOF3  = 0xc3,
-
-  M_SOF5  = 0xc5,
-  M_SOF6  = 0xc6,
-  M_SOF7  = 0xc7,
-
-  M_JPG   = 0xc8,
-  M_SOF9  = 0xc9,
-  M_SOF10 = 0xca,
-  M_SOF11 = 0xcb,
-
-  M_SOF13 = 0xcd,
-  M_SOF14 = 0xce,
-  M_SOF15 = 0xcf,
-
-  M_DHT   = 0xc4,
-
-  M_DAC   = 0xcc,
-
-  M_RST0  = 0xd0,
-  M_RST1  = 0xd1,
-  M_RST2  = 0xd2,
-  M_RST3  = 0xd3,
-  M_RST4  = 0xd4,
-  M_RST5  = 0xd5,
-  M_RST6  = 0xd6,
-  M_RST7  = 0xd7,
-
-  M_SOI   = 0xd8,
-  M_EOI   = 0xd9,
-  M_SOS   = 0xda,
-  M_DQT   = 0xdb,
-  M_DNL   = 0xdc,
-  M_DRI   = 0xdd,
-  M_DHP   = 0xde,
-  M_EXP   = 0xdf,
-
-  M_APP0  = 0xe0,
-  M_APP1  = 0xe1,
-  M_APP2  = 0xe2,
-  M_APP3  = 0xe3,
-  M_APP4  = 0xe4,
-  M_APP5  = 0xe5,
-  M_APP6  = 0xe6,
-  M_APP7  = 0xe7,
-  M_APP8  = 0xe8,
-  M_APP9  = 0xe9,
-  M_APP10 = 0xea,
-  M_APP11 = 0xeb,
-  M_APP12 = 0xec,
-  M_APP13 = 0xed,
-  M_APP14 = 0xee,
-  M_APP15 = 0xef,
-
-  M_JPG0  = 0xf0,
-  M_JPG8  = 0xf8,
-  M_JPG13 = 0xfd,
-  M_COM   = 0xfe,
-
-  M_TEM   = 0x01,
-
-  M_ERROR = 0x100
-} JPEG_MARKER;
-
-
-/* Private state */
-
-typedef struct {
-  struct jpeg_marker_reader pub; /* public fields */
-
-  /* Application-overridable marker processing methods */
-  jpeg_marker_parser_method process_COM;
-  jpeg_marker_parser_method process_APPn[16];
-
-  /* Limit on marker data length to save for each marker type */
-  unsigned int length_limit_COM;
-  unsigned int length_limit_APPn[16];
-
-  /* Status of COM/APPn marker saving */
-  jpeg_saved_marker_ptr cur_marker;	/* NULL if not processing a marker */
-  unsigned int bytes_read;		/* data bytes read so far in marker */
-  /* Note: cur_marker is not linked into marker_list until it's all read. */
-} my_marker_reader;
-
-typedef my_marker_reader * my_marker_ptr;
-
-
-/*
- * Macros for fetching data from the data source module.
- *
- * At all times, cinfo->src->next_input_byte and ->bytes_in_buffer reflect
- * the current restart point; we update them only when we have reached a
- * suitable place to restart if a suspension occurs.
- */
-
-/* Declare and initialize local copies of input pointer/count */
-#define INPUT_VARS(cinfo)  \
-	struct jpeg_source_mgr * datasrc = (cinfo)->src;  \
-	const JOCTET * next_input_byte = datasrc->next_input_byte;  \
-	size_t bytes_in_buffer = datasrc->bytes_in_buffer
-
-/* Unload the local copies --- do this only at a restart boundary */
-#define INPUT_SYNC(cinfo)  \
-	( datasrc->next_input_byte = next_input_byte,  \
-	  datasrc->bytes_in_buffer = bytes_in_buffer )
-
-/* Reload the local copies --- used only in MAKE_BYTE_AVAIL */
-#define INPUT_RELOAD(cinfo)  \
-	( next_input_byte = datasrc->next_input_byte,  \
-	  bytes_in_buffer = datasrc->bytes_in_buffer )
-
-/* Internal macro for INPUT_BYTE and INPUT_2BYTES: make a byte available.
- * Note we do *not* do INPUT_SYNC before calling fill_input_buffer,
- * but we must reload the local copies after a successful fill.
- */
-#define MAKE_BYTE_AVAIL(cinfo,action)  \
-	if (bytes_in_buffer == 0) {  \
-	  if (! (*datasrc->fill_input_buffer) (cinfo))  \
-	    { action; }  \
-	  INPUT_RELOAD(cinfo);  \
-	}
-
-/* Read a byte into variable V.
- * If must suspend, take the specified action (typically "return FALSE").
- */
-#define INPUT_BYTE(cinfo,V,action)  \
-	MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
-		  bytes_in_buffer--; \
-		  V = GETJOCTET(*next_input_byte++); )
-
-/* As above, but read two bytes interpreted as an unsigned 16-bit integer.
- * V should be declared unsigned int or perhaps INT32.
- */
-#define INPUT_2BYTES(cinfo,V,action)  \
-	MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
-		  bytes_in_buffer--; \
-		  V = ((unsigned int) GETJOCTET(*next_input_byte++)) << 8; \
-		  MAKE_BYTE_AVAIL(cinfo,action); \
-		  bytes_in_buffer--; \
-		  V += GETJOCTET(*next_input_byte++); )
-
-
-/*
- * Routines to process JPEG markers.
- *
- * Entry condition: JPEG marker itself has been read and its code saved
- *   in cinfo->unread_marker; input restart point is just after the marker.
- *
- * Exit: if return TRUE, have read and processed any parameters, and have
- *   updated the restart point to point after the parameters.
- *   If return FALSE, was forced to suspend before reaching end of
- *   marker parameters; restart point has not been moved.  Same routine
- *   will be called again after application supplies more input data.
- *
- * This approach to suspension assumes that all of a marker's parameters
- * can fit into a single input bufferload.  This should hold for "normal"
- * markers.  Some COM/APPn markers might have large parameter segments
- * that might not fit.  If we are simply dropping such a marker, we use
- * skip_input_data to get past it, and thereby put the problem on the
- * source manager's shoulders.  If we are saving the marker's contents
- * into memory, we use a slightly different convention: when forced to
- * suspend, the marker processor updates the restart point to the end of
- * what it's consumed (ie, the end of the buffer) before returning FALSE.
- * On resumption, cinfo->unread_marker still contains the marker code,
- * but the data source will point to the next chunk of marker data.
- * The marker processor must retain internal state to deal with this.
- *
- * Note that we don't bother to avoid duplicate trace messages if a
- * suspension occurs within marker parameters.  Other side effects
- * require more care.
- */
-
-
-LOCAL(boolean)
-get_soi (j_decompress_ptr cinfo)
-/* Process an SOI marker */
-{
-  int i;
-  
-  TRACEMS(cinfo, 1, JTRC_SOI);
-
-  if (cinfo->marker->saw_SOI)
-    ERREXIT(cinfo, JERR_SOI_DUPLICATE);
-
-  /* Reset all parameters that are defined to be reset by SOI */
-
-  for (i = 0; i < NUM_ARITH_TBLS; i++) {
-    cinfo->arith_dc_L[i] = 0;
-    cinfo->arith_dc_U[i] = 1;
-    cinfo->arith_ac_K[i] = 5;
-  }
-  cinfo->restart_interval = 0;
-
-  /* Set initial assumptions for colorspace etc */
-
-  cinfo->jpeg_color_space = JCS_UNKNOWN;
-  cinfo->color_transform = JCT_NONE;
-  cinfo->CCIR601_sampling = FALSE; /* Assume non-CCIR sampling??? */
-
-  cinfo->saw_JFIF_marker = FALSE;
-  cinfo->JFIF_major_version = 1; /* set default JFIF APP0 values */
-  cinfo->JFIF_minor_version = 1;
-  cinfo->density_unit = 0;
-  cinfo->X_density = 1;
-  cinfo->Y_density = 1;
-  cinfo->saw_Adobe_marker = FALSE;
-  cinfo->Adobe_transform = 0;
-
-  cinfo->marker->saw_SOI = TRUE;
-
-  return TRUE;
-}
-
-
-LOCAL(boolean)
-get_sof (j_decompress_ptr cinfo, boolean is_baseline, boolean is_prog,
-	 boolean is_arith)
-/* Process a SOFn marker */
-{
-  INT32 length;
-  int c, ci, i;
-  jpeg_component_info * compptr;
-  INPUT_VARS(cinfo);
-
-  cinfo->is_baseline = is_baseline;
-  cinfo->progressive_mode = is_prog;
-  cinfo->arith_code = is_arith;
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-
-  INPUT_BYTE(cinfo, cinfo->data_precision, return FALSE);
-  INPUT_2BYTES(cinfo, cinfo->image_height, return FALSE);
-  INPUT_2BYTES(cinfo, cinfo->image_width, return FALSE);
-  INPUT_BYTE(cinfo, cinfo->num_components, return FALSE);
-
-  length -= 8;
-
-  TRACEMS4(cinfo, 1, JTRC_SOF, cinfo->unread_marker,
-	   (int) cinfo->image_width, (int) cinfo->image_height,
-	   cinfo->num_components);
-
-  if (cinfo->marker->saw_SOF)
-    ERREXIT(cinfo, JERR_SOF_DUPLICATE);
-
-  /* We don't support files in which the image height is initially specified */
-  /* as 0 and is later redefined by DNL.  As long as we have to check that,  */
-  /* might as well have a general sanity check. */
-  if (cinfo->image_height <= 0 || cinfo->image_width <= 0 ||
-      cinfo->num_components <= 0)
-    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
-
-  if (length != (cinfo->num_components * 3))
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  if (cinfo->comp_info == NULL)	/* do only once, even if suspend */
-    cinfo->comp_info = (jpeg_component_info *) (*cinfo->mem->alloc_small)
-			((j_common_ptr) cinfo, JPOOL_IMAGE,
-			 cinfo->num_components * SIZEOF(jpeg_component_info));
-
-  for (ci = 0; ci < cinfo->num_components; ci++) {
-    INPUT_BYTE(cinfo, c, return FALSE);
-    /* Check to see whether component id has already been seen   */
-    /* (in violation of the spec, but unfortunately seen in some */
-    /* files).  If so, create "fake" component id equal to the   */
-    /* max id seen so far + 1. */
-    for (i = 0, compptr = cinfo->comp_info; i < ci; i++, compptr++) {
-      if (c == compptr->component_id) {
-	compptr = cinfo->comp_info;
-	c = compptr->component_id;
-	compptr++;
-	for (i = 1; i < ci; i++, compptr++) {
-	  if (compptr->component_id > c) c = compptr->component_id;
-	}
-	c++;
-	break;
-      }
-    }
-    compptr->component_id = c;
-    compptr->component_index = ci;
-    INPUT_BYTE(cinfo, c, return FALSE);
-    compptr->h_samp_factor = (c >> 4) & 15;
-    compptr->v_samp_factor = (c     ) & 15;
-    INPUT_BYTE(cinfo, compptr->quant_tbl_no, return FALSE);
-
-    TRACEMS4(cinfo, 1, JTRC_SOF_COMPONENT,
-	     compptr->component_id, compptr->h_samp_factor,
-	     compptr->v_samp_factor, compptr->quant_tbl_no);
-  }
-
-  cinfo->marker->saw_SOF = TRUE;
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-LOCAL(boolean)
-get_sos (j_decompress_ptr cinfo)
-/* Process a SOS marker */
-{
-  INT32 length;
-  int c, ci, i, n;
-  jpeg_component_info * compptr;
-  INPUT_VARS(cinfo);
-
-  if (! cinfo->marker->saw_SOF)
-    ERREXITS(cinfo, JERR_SOF_BEFORE, "SOS");
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-
-  INPUT_BYTE(cinfo, n, return FALSE); /* Number of components */
-
-  TRACEMS1(cinfo, 1, JTRC_SOS, n);
-
-  if (length != (n * 2 + 6) || n > MAX_COMPS_IN_SCAN ||
-      (n == 0 && !cinfo->progressive_mode))
-      /* pseudo SOS marker only allowed in progressive mode */
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  cinfo->comps_in_scan = n;
-
-  /* Collect the component-spec parameters */
-
-  for (i = 0; i < n; i++) {
-    INPUT_BYTE(cinfo, c, return FALSE);
-
-    /* Detect the case where component id's are not unique, and, if so, */
-    /* create a fake component id using the same logic as in get_sof.   */
-    /* Note:  This also ensures that all of the SOF components are      */
-    /* referenced in the single scan case, which prevents access to     */
-    /* uninitialized memory in later decoding stages. */
-    for (ci = 0; ci < i; ci++) {
-      if (c == cinfo->cur_comp_info[ci]->component_id) {
-	c = cinfo->cur_comp_info[0]->component_id;
-	for (ci = 1; ci < i; ci++) {
-	  compptr = cinfo->cur_comp_info[ci];
-	  if (compptr->component_id > c) c = compptr->component_id;
-	}
-	c++;
-	break;
-      }
-    }
-
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      if (c == compptr->component_id)
-	goto id_found;
-    }
-
-    ERREXIT1(cinfo, JERR_BAD_COMPONENT_ID, c);
-
-  id_found:
-
-    cinfo->cur_comp_info[i] = compptr;
-    INPUT_BYTE(cinfo, c, return FALSE);
-    compptr->dc_tbl_no = (c >> 4) & 15;
-    compptr->ac_tbl_no = (c     ) & 15;
-
-    TRACEMS3(cinfo, 1, JTRC_SOS_COMPONENT, compptr->component_id,
-	     compptr->dc_tbl_no, compptr->ac_tbl_no);
-  }
-
-  /* Collect the additional scan parameters Ss, Se, Ah/Al. */
-  INPUT_BYTE(cinfo, c, return FALSE);
-  cinfo->Ss = c;
-  INPUT_BYTE(cinfo, c, return FALSE);
-  cinfo->Se = c;
-  INPUT_BYTE(cinfo, c, return FALSE);
-  cinfo->Ah = (c >> 4) & 15;
-  cinfo->Al = (c     ) & 15;
-
-  TRACEMS4(cinfo, 1, JTRC_SOS_PARAMS, cinfo->Ss, cinfo->Se,
-	   cinfo->Ah, cinfo->Al);
-
-  /* Prepare to scan data & restart markers */
-  cinfo->marker->next_restart_num = 0;
-
-  /* Count another (non-pseudo) SOS marker */
-  if (n) cinfo->input_scan_number++;
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-#ifdef D_ARITH_CODING_SUPPORTED
-
-LOCAL(boolean)
-get_dac (j_decompress_ptr cinfo)
-/* Process a DAC marker */
-{
-  INT32 length;
-  int index, val;
-  INPUT_VARS(cinfo);
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-  length -= 2;
-  
-  while (length > 0) {
-    INPUT_BYTE(cinfo, index, return FALSE);
-    INPUT_BYTE(cinfo, val, return FALSE);
-
-    length -= 2;
-
-    TRACEMS2(cinfo, 1, JTRC_DAC, index, val);
-
-    if (index < 0 || index >= (2*NUM_ARITH_TBLS))
-      ERREXIT1(cinfo, JERR_DAC_INDEX, index);
-
-    if (index >= NUM_ARITH_TBLS) { /* define AC table */
-      cinfo->arith_ac_K[index-NUM_ARITH_TBLS] = (UINT8) val;
-    } else {			/* define DC table */
-      cinfo->arith_dc_L[index] = (UINT8) (val & 0x0F);
-      cinfo->arith_dc_U[index] = (UINT8) (val >> 4);
-      if (cinfo->arith_dc_L[index] > cinfo->arith_dc_U[index])
-	ERREXIT1(cinfo, JERR_DAC_VALUE, val);
-    }
-  }
-
-  if (length != 0)
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-#else /* ! D_ARITH_CODING_SUPPORTED */
-
-#define get_dac(cinfo)  skip_variable(cinfo)
-
-#endif /* D_ARITH_CODING_SUPPORTED */
-
-
-LOCAL(boolean)
-get_dht (j_decompress_ptr cinfo)
-/* Process a DHT marker */
-{
-  INT32 length;
-  UINT8 bits[17];
-  UINT8 huffval[256];
-  int i, index, count;
-  JHUFF_TBL **htblptr;
-  INPUT_VARS(cinfo);
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-  length -= 2;
-  
-  while (length > 16) {
-    INPUT_BYTE(cinfo, index, return FALSE);
-
-    TRACEMS1(cinfo, 1, JTRC_DHT, index);
-      
-    bits[0] = 0;
-    count = 0;
-    for (i = 1; i <= 16; i++) {
-      INPUT_BYTE(cinfo, bits[i], return FALSE);
-      count += bits[i];
-    }
-
-    length -= 1 + 16;
-
-    TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
-	     bits[1], bits[2], bits[3], bits[4],
-	     bits[5], bits[6], bits[7], bits[8]);
-    TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
-	     bits[9], bits[10], bits[11], bits[12],
-	     bits[13], bits[14], bits[15], bits[16]);
-
-    /* Here we just do minimal validation of the counts to avoid walking
-     * off the end of our table space.  jdhuff.c will check more carefully.
-     */
-    if (count > 256 || ((INT32) count) > length)
-      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-
-    for (i = 0; i < count; i++)
-      INPUT_BYTE(cinfo, huffval[i], return FALSE);
-
-    length -= count;
-
-    if (index & 0x10) {		/* AC table definition */
-      index -= 0x10;
-      htblptr = &cinfo->ac_huff_tbl_ptrs[index];
-    } else {			/* DC table definition */
-      htblptr = &cinfo->dc_huff_tbl_ptrs[index];
-    }
-
-    if (index < 0 || index >= NUM_HUFF_TBLS)
-      ERREXIT1(cinfo, JERR_DHT_INDEX, index);
-
-    if (*htblptr == NULL)
-      *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
-  
-    MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits));
-    if (count > 0)
-      MEMCOPY((*htblptr)->huffval, huffval, count * SIZEOF(UINT8));
-  }
-
-  if (length != 0)
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-LOCAL(boolean)
-get_dqt (j_decompress_ptr cinfo)
-/* Process a DQT marker */
-{
-  INT32 length, count, i;
-  int n, prec;
-  unsigned int tmp;
-  JQUANT_TBL *quant_ptr;
-  const int *natural_order;
-  INPUT_VARS(cinfo);
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-  length -= 2;
-
-  while (length > 0) {
-    length--;
-    INPUT_BYTE(cinfo, n, return FALSE);
-    prec = n >> 4;
-    n &= 0x0F;
-
-    TRACEMS2(cinfo, 1, JTRC_DQT, n, prec);
-
-    if (n >= NUM_QUANT_TBLS)
-      ERREXIT1(cinfo, JERR_DQT_INDEX, n);
-      
-    if (cinfo->quant_tbl_ptrs[n] == NULL)
-      cinfo->quant_tbl_ptrs[n] = jpeg_alloc_quant_table((j_common_ptr) cinfo);
-    quant_ptr = cinfo->quant_tbl_ptrs[n];
-
-    if (prec) {
-      if (length < DCTSIZE2 * 2) {
-	/* Initialize full table for safety. */
-	for (i = 0; i < DCTSIZE2; i++) {
-	  quant_ptr->quantval[i] = 1;
-	}
-	count = length >> 1;
-      } else
-	count = DCTSIZE2;
-    } else {
-      if (length < DCTSIZE2) {
-	/* Initialize full table for safety. */
-	for (i = 0; i < DCTSIZE2; i++) {
-	  quant_ptr->quantval[i] = 1;
-	}
-	count = length;
-      } else
-	count = DCTSIZE2;
-    }
-
-    switch ((int) count) {
-    case (2*2): natural_order = jpeg_natural_order2; break;
-    case (3*3): natural_order = jpeg_natural_order3; break;
-    case (4*4): natural_order = jpeg_natural_order4; break;
-    case (5*5): natural_order = jpeg_natural_order5; break;
-    case (6*6): natural_order = jpeg_natural_order6; break;
-    case (7*7): natural_order = jpeg_natural_order7; break;
-    default:    natural_order = jpeg_natural_order;
-    }
-
-    for (i = 0; i < count; i++) {
-      if (prec)
-	INPUT_2BYTES(cinfo, tmp, return FALSE);
-      else
-	INPUT_BYTE(cinfo, tmp, return FALSE);
-      /* We convert the zigzag-order table to natural array order. */
-      quant_ptr->quantval[natural_order[i]] = (UINT16) tmp;
-    }
-
-    if (cinfo->err->trace_level >= 2) {
-      for (i = 0; i < DCTSIZE2; i += 8) {
-	TRACEMS8(cinfo, 2, JTRC_QUANTVALS,
-		 quant_ptr->quantval[i],   quant_ptr->quantval[i+1],
-		 quant_ptr->quantval[i+2], quant_ptr->quantval[i+3],
-		 quant_ptr->quantval[i+4], quant_ptr->quantval[i+5],
-		 quant_ptr->quantval[i+6], quant_ptr->quantval[i+7]);
-      }
-    }
-
-    length -= count;
-    if (prec) length -= count;
-  }
-
-  if (length != 0)
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-LOCAL(boolean)
-get_dri (j_decompress_ptr cinfo)
-/* Process a DRI marker */
-{
-  INT32 length;
-  unsigned int tmp;
-  INPUT_VARS(cinfo);
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-  
-  if (length != 4)
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  INPUT_2BYTES(cinfo, tmp, return FALSE);
-
-  TRACEMS1(cinfo, 1, JTRC_DRI, tmp);
-
-  cinfo->restart_interval = tmp;
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-LOCAL(boolean)
-get_lse (j_decompress_ptr cinfo)
-/* Process an LSE marker */
-{
-  INT32 length;
-  unsigned int tmp;
-  int cid;
-  INPUT_VARS(cinfo);
-
-  if (! cinfo->marker->saw_SOF)
-    ERREXITS(cinfo, JERR_SOF_BEFORE, "LSE");
-
-  if (cinfo->num_components < 3) goto bad;
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-
-  if (length != 24)
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  INPUT_BYTE(cinfo, tmp, return FALSE);
-  if (tmp != 0x0D)	/* ID inverse transform specification */
-    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
-  INPUT_2BYTES(cinfo, tmp, return FALSE);
-  if (tmp != MAXJSAMPLE) goto bad;		/* MAXTRANS */
-  INPUT_BYTE(cinfo, tmp, return FALSE);
-  if (tmp != 3) goto bad;			/* Nt=3 */
-  INPUT_BYTE(cinfo, cid, return FALSE);
-  if (cid != cinfo->comp_info[1].component_id) goto bad;
-  INPUT_BYTE(cinfo, cid, return FALSE);
-  if (cid != cinfo->comp_info[0].component_id) goto bad;
-  INPUT_BYTE(cinfo, cid, return FALSE);
-  if (cid != cinfo->comp_info[2].component_id) goto bad;
-  INPUT_BYTE(cinfo, tmp, return FALSE);
-  if (tmp != 0x80) goto bad;		/* F1: CENTER1=1, NORM1=0 */
-  INPUT_2BYTES(cinfo, tmp, return FALSE);
-  if (tmp != 0) goto bad;			/* A(1,1)=0 */
-  INPUT_2BYTES(cinfo, tmp, return FALSE);
-  if (tmp != 0) goto bad;			/* A(1,2)=0 */
-  INPUT_BYTE(cinfo, tmp, return FALSE);
-  if (tmp != 0) goto bad;		/* F2: CENTER2=0, NORM2=0 */
-  INPUT_2BYTES(cinfo, tmp, return FALSE);
-  if (tmp != 1) goto bad;			/* A(2,1)=1 */
-  INPUT_2BYTES(cinfo, tmp, return FALSE);
-  if (tmp != 0) goto bad;			/* A(2,2)=0 */
-  INPUT_BYTE(cinfo, tmp, return FALSE);
-  if (tmp != 0) goto bad;		/* F3: CENTER3=0, NORM3=0 */
-  INPUT_2BYTES(cinfo, tmp, return FALSE);
-  if (tmp != 1) goto bad;			/* A(3,1)=1 */
-  INPUT_2BYTES(cinfo, tmp, return FALSE);
-  if (tmp != 0) {				/* A(3,2)=0 */
-    bad:
-    ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-  }
-
-  /* OK, valid transform that we can handle. */
-  cinfo->color_transform = JCT_SUBTRACT_GREEN;
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-/*
- * Routines for processing APPn and COM markers.
- * These are either saved in memory or discarded, per application request.
- * APP0 and APP14 are specially checked to see if they are
- * JFIF and Adobe markers, respectively.
- */
-
-#define APP0_DATA_LEN	14	/* Length of interesting data in APP0 */
-#define APP14_DATA_LEN	12	/* Length of interesting data in APP14 */
-#define APPN_DATA_LEN	14	/* Must be the largest of the above!! */
-
-
-LOCAL(void)
-examine_app0 (j_decompress_ptr cinfo, JOCTET FAR * data,
-	      unsigned int datalen, INT32 remaining)
-/* Examine first few bytes from an APP0.
- * Take appropriate action if it is a JFIF marker.
- * datalen is # of bytes at data[], remaining is length of rest of marker data.
- */
-{
-  INT32 totallen = (INT32) datalen + remaining;
-
-  if (datalen >= APP0_DATA_LEN &&
-      GETJOCTET(data[0]) == 0x4A &&
-      GETJOCTET(data[1]) == 0x46 &&
-      GETJOCTET(data[2]) == 0x49 &&
-      GETJOCTET(data[3]) == 0x46 &&
-      GETJOCTET(data[4]) == 0) {
-    /* Found JFIF APP0 marker: save info */
-    cinfo->saw_JFIF_marker = TRUE;
-    cinfo->JFIF_major_version = GETJOCTET(data[5]);
-    cinfo->JFIF_minor_version = GETJOCTET(data[6]);
-    cinfo->density_unit = GETJOCTET(data[7]);
-    cinfo->X_density = (GETJOCTET(data[8]) << 8) + GETJOCTET(data[9]);
-    cinfo->Y_density = (GETJOCTET(data[10]) << 8) + GETJOCTET(data[11]);
-    /* Check version.
-     * Major version must be 1 or 2, anything else signals an incompatible
-     * change.
-     * (We used to treat this as an error, but now it's a nonfatal warning,
-     * because some bozo at Hijaak couldn't read the spec.)
-     * Minor version should be 0..2, but process anyway if newer.
-     */
-    if (cinfo->JFIF_major_version != 1 && cinfo->JFIF_major_version != 2)
-      WARNMS2(cinfo, JWRN_JFIF_MAJOR,
-	      cinfo->JFIF_major_version, cinfo->JFIF_minor_version);
-    /* Generate trace messages */
-    TRACEMS5(cinfo, 1, JTRC_JFIF,
-	     cinfo->JFIF_major_version, cinfo->JFIF_minor_version,
-	     cinfo->X_density, cinfo->Y_density, cinfo->density_unit);
-    /* Validate thumbnail dimensions and issue appropriate messages */
-    if (GETJOCTET(data[12]) | GETJOCTET(data[13]))
-      TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL,
-	       GETJOCTET(data[12]), GETJOCTET(data[13]));
-    totallen -= APP0_DATA_LEN;
-    if (totallen !=
-	((INT32)GETJOCTET(data[12]) * (INT32)GETJOCTET(data[13]) * (INT32) 3))
-      TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int) totallen);
-  } else if (datalen >= 6 &&
-      GETJOCTET(data[0]) == 0x4A &&
-      GETJOCTET(data[1]) == 0x46 &&
-      GETJOCTET(data[2]) == 0x58 &&
-      GETJOCTET(data[3]) == 0x58 &&
-      GETJOCTET(data[4]) == 0) {
-    /* Found JFIF "JFXX" extension APP0 marker */
-    /* The library doesn't actually do anything with these,
-     * but we try to produce a helpful trace message.
-     */
-    switch (GETJOCTET(data[5])) {
-    case 0x10:
-      TRACEMS1(cinfo, 1, JTRC_THUMB_JPEG, (int) totallen);
-      break;
-    case 0x11:
-      TRACEMS1(cinfo, 1, JTRC_THUMB_PALETTE, (int) totallen);
-      break;
-    case 0x13:
-      TRACEMS1(cinfo, 1, JTRC_THUMB_RGB, (int) totallen);
-      break;
-    default:
-      TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION,
-	       GETJOCTET(data[5]), (int) totallen);
-    }
-  } else {
-    /* Start of APP0 does not match "JFIF" or "JFXX", or too short */
-    TRACEMS1(cinfo, 1, JTRC_APP0, (int) totallen);
-  }
-}
-
-
-LOCAL(void)
-examine_app14 (j_decompress_ptr cinfo, JOCTET FAR * data,
-	       unsigned int datalen, INT32 remaining)
-/* Examine first few bytes from an APP14.
- * Take appropriate action if it is an Adobe marker.
- * datalen is # of bytes at data[], remaining is length of rest of marker data.
- */
-{
-  unsigned int version, flags0, flags1, transform;
-
-  if (datalen >= APP14_DATA_LEN &&
-      GETJOCTET(data[0]) == 0x41 &&
-      GETJOCTET(data[1]) == 0x64 &&
-      GETJOCTET(data[2]) == 0x6F &&
-      GETJOCTET(data[3]) == 0x62 &&
-      GETJOCTET(data[4]) == 0x65) {
-    /* Found Adobe APP14 marker */
-    version = (GETJOCTET(data[5]) << 8) + GETJOCTET(data[6]);
-    flags0 = (GETJOCTET(data[7]) << 8) + GETJOCTET(data[8]);
-    flags1 = (GETJOCTET(data[9]) << 8) + GETJOCTET(data[10]);
-    transform = GETJOCTET(data[11]);
-    TRACEMS4(cinfo, 1, JTRC_ADOBE, version, flags0, flags1, transform);
-    cinfo->saw_Adobe_marker = TRUE;
-    cinfo->Adobe_transform = (UINT8) transform;
-  } else {
-    /* Start of APP14 does not match "Adobe", or too short */
-    TRACEMS1(cinfo, 1, JTRC_APP14, (int) (datalen + remaining));
-  }
-}
-
-
-METHODDEF(boolean)
-get_interesting_appn (j_decompress_ptr cinfo)
-/* Process an APP0 or APP14 marker without saving it */
-{
-  INT32 length;
-  JOCTET b[APPN_DATA_LEN];
-  unsigned int i, numtoread;
-  INPUT_VARS(cinfo);
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-  length -= 2;
-
-  /* get the interesting part of the marker data */
-  if (length >= APPN_DATA_LEN)
-    numtoread = APPN_DATA_LEN;
-  else if (length > 0)
-    numtoread = (unsigned int) length;
-  else
-    numtoread = 0;
-  for (i = 0; i < numtoread; i++)
-    INPUT_BYTE(cinfo, b[i], return FALSE);
-  length -= numtoread;
-
-  /* process it */
-  switch (cinfo->unread_marker) {
-  case M_APP0:
-    examine_app0(cinfo, (JOCTET FAR *) b, numtoread, length);
-    break;
-  case M_APP14:
-    examine_app14(cinfo, (JOCTET FAR *) b, numtoread, length);
-    break;
-  default:
-    /* can't get here unless jpeg_save_markers chooses wrong processor */
-    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
-  }
-
-  /* skip any remaining data -- could be lots */
-  INPUT_SYNC(cinfo);
-  if (length > 0)
-    (*cinfo->src->skip_input_data) (cinfo, (long) length);
-
-  return TRUE;
-}
-
-
-#ifdef SAVE_MARKERS_SUPPORTED
-
-METHODDEF(boolean)
-save_marker (j_decompress_ptr cinfo)
-/* Save an APPn or COM marker into the marker list */
-{
-  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
-  jpeg_saved_marker_ptr cur_marker = marker->cur_marker;
-  unsigned int bytes_read, data_length;
-  JOCTET FAR * data;
-  INT32 length = 0;
-  INPUT_VARS(cinfo);
-
-  if (cur_marker == NULL) {
-    /* begin reading a marker */
-    INPUT_2BYTES(cinfo, length, return FALSE);
-    length -= 2;
-    if (length >= 0) {		/* watch out for bogus length word */
-      /* figure out how much we want to save */
-      unsigned int limit;
-      if (cinfo->unread_marker == (int) M_COM)
-	limit = marker->length_limit_COM;
-      else
-	limit = marker->length_limit_APPn[cinfo->unread_marker - (int) M_APP0];
-      if ((unsigned int) length < limit)
-	limit = (unsigned int) length;
-      /* allocate and initialize the marker item */
-      cur_marker = (jpeg_saved_marker_ptr)
-	(*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				    SIZEOF(struct jpeg_marker_struct) + limit);
-      cur_marker->next = NULL;
-      cur_marker->marker = (UINT8) cinfo->unread_marker;
-      cur_marker->original_length = (unsigned int) length;
-      cur_marker->data_length = limit;
-      /* data area is just beyond the jpeg_marker_struct */
-      data = cur_marker->data = (JOCTET FAR *) (cur_marker + 1);
-      marker->cur_marker = cur_marker;
-      marker->bytes_read = 0;
-      bytes_read = 0;
-      data_length = limit;
-    } else {
-      /* deal with bogus length word */
-      bytes_read = data_length = 0;
-      data = NULL;
-    }
-  } else {
-    /* resume reading a marker */
-    bytes_read = marker->bytes_read;
-    data_length = cur_marker->data_length;
-    data = cur_marker->data + bytes_read;
-  }
-
-  while (bytes_read < data_length) {
-    INPUT_SYNC(cinfo);		/* move the restart point to here */
-    marker->bytes_read = bytes_read;
-    /* If there's not at least one byte in buffer, suspend */
-    MAKE_BYTE_AVAIL(cinfo, return FALSE);
-    /* Copy bytes with reasonable rapidity */
-    while (bytes_read < data_length && bytes_in_buffer > 0) {
-      *data++ = *next_input_byte++;
-      bytes_in_buffer--;
-      bytes_read++;
-    }
-  }
-
-  /* Done reading what we want to read */
-  if (cur_marker != NULL) {	/* will be NULL if bogus length word */
-    /* Add new marker to end of list */
-    if (cinfo->marker_list == NULL) {
-      cinfo->marker_list = cur_marker;
-    } else {
-      jpeg_saved_marker_ptr prev = cinfo->marker_list;
-      while (prev->next != NULL)
-	prev = prev->next;
-      prev->next = cur_marker;
-    }
-    /* Reset pointer & calc remaining data length */
-    data = cur_marker->data;
-    length = cur_marker->original_length - data_length;
-  }
-  /* Reset to initial state for next marker */
-  marker->cur_marker = NULL;
-
-  /* Process the marker if interesting; else just make a generic trace msg */
-  switch (cinfo->unread_marker) {
-  case M_APP0:
-    examine_app0(cinfo, data, data_length, length);
-    break;
-  case M_APP14:
-    examine_app14(cinfo, data, data_length, length);
-    break;
-  default:
-    TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker,
-	     (int) (data_length + length));
-  }
-
-  /* skip any remaining data -- could be lots */
-  INPUT_SYNC(cinfo);		/* do before skip_input_data */
-  if (length > 0)
-    (*cinfo->src->skip_input_data) (cinfo, (long) length);
-
-  return TRUE;
-}
-
-#endif /* SAVE_MARKERS_SUPPORTED */
-
-
-METHODDEF(boolean)
-skip_variable (j_decompress_ptr cinfo)
-/* Skip over an unknown or uninteresting variable-length marker */
-{
-  INT32 length;
-  INPUT_VARS(cinfo);
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-  length -= 2;
-  
-  TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker, (int) length);
-
-  INPUT_SYNC(cinfo);		/* do before skip_input_data */
-  if (length > 0)
-    (*cinfo->src->skip_input_data) (cinfo, (long) length);
-
-  return TRUE;
-}
-
-
-/*
- * Find the next JPEG marker, save it in cinfo->unread_marker.
- * Returns FALSE if had to suspend before reaching a marker;
- * in that case cinfo->unread_marker is unchanged.
- *
- * Note that the result might not be a valid marker code,
- * but it will never be 0 or FF.
- */
-
-LOCAL(boolean)
-next_marker (j_decompress_ptr cinfo)
-{
-  int c;
-  INPUT_VARS(cinfo);
-
-  for (;;) {
-    INPUT_BYTE(cinfo, c, return FALSE);
-    /* Skip any non-FF bytes.
-     * This may look a bit inefficient, but it will not occur in a valid file.
-     * We sync after each discarded byte so that a suspending data source
-     * can discard the byte from its buffer.
-     */
-    while (c != 0xFF) {
-      cinfo->marker->discarded_bytes++;
-      INPUT_SYNC(cinfo);
-      INPUT_BYTE(cinfo, c, return FALSE);
-    }
-    /* This loop swallows any duplicate FF bytes.  Extra FFs are legal as
-     * pad bytes, so don't count them in discarded_bytes.  We assume there
-     * will not be so many consecutive FF bytes as to overflow a suspending
-     * data source's input buffer.
-     */
-    do {
-      INPUT_BYTE(cinfo, c, return FALSE);
-    } while (c == 0xFF);
-    if (c != 0)
-      break;			/* found a valid marker, exit loop */
-    /* Reach here if we found a stuffed-zero data sequence (FF/00).
-     * Discard it and loop back to try again.
-     */
-    cinfo->marker->discarded_bytes += 2;
-    INPUT_SYNC(cinfo);
-  }
-
-  if (cinfo->marker->discarded_bytes != 0) {
-    WARNMS2(cinfo, JWRN_EXTRANEOUS_DATA, cinfo->marker->discarded_bytes, c);
-    cinfo->marker->discarded_bytes = 0;
-  }
-
-  cinfo->unread_marker = c;
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-LOCAL(boolean)
-first_marker (j_decompress_ptr cinfo)
-/* Like next_marker, but used to obtain the initial SOI marker. */
-/* For this marker, we do not allow preceding garbage or fill; otherwise,
- * we might well scan an entire input file before realizing it ain't JPEG.
- * If an application wants to process non-JFIF files, it must seek to the
- * SOI before calling the JPEG library.
- */
-{
-  int c, c2;
-  INPUT_VARS(cinfo);
-
-  INPUT_BYTE(cinfo, c, return FALSE);
-  INPUT_BYTE(cinfo, c2, return FALSE);
-  if (c != 0xFF || c2 != (int) M_SOI)
-    ERREXIT2(cinfo, JERR_NO_SOI, c, c2);
-
-  cinfo->unread_marker = c2;
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-/*
- * Read markers until SOS or EOI.
- *
- * Returns same codes as are defined for jpeg_consume_input:
- * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
- *
- * Note: This function may return a pseudo SOS marker (with zero
- * component number) for treat by input controller's consume_input.
- * consume_input itself should filter out (skip) the pseudo marker
- * after processing for the caller.
- */
-
-METHODDEF(int)
-read_markers (j_decompress_ptr cinfo)
-{
-  /* Outer loop repeats once for each marker. */
-  for (;;) {
-    /* Collect the marker proper, unless we already did. */
-    /* NB: first_marker() enforces the requirement that SOI appear first. */
-    if (cinfo->unread_marker == 0) {
-      if (! cinfo->marker->saw_SOI) {
-	if (! first_marker(cinfo))
-	  return JPEG_SUSPENDED;
-      } else {
-	if (! next_marker(cinfo))
-	  return JPEG_SUSPENDED;
-      }
-    }
-    /* At this point cinfo->unread_marker contains the marker code and the
-     * input point is just past the marker proper, but before any parameters.
-     * A suspension will cause us to return with this state still true.
-     */
-    switch (cinfo->unread_marker) {
-    case M_SOI:
-      if (! get_soi(cinfo))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_SOF0:		/* Baseline */
-      if (! get_sof(cinfo, TRUE, FALSE, FALSE))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_SOF1:		/* Extended sequential, Huffman */
-      if (! get_sof(cinfo, FALSE, FALSE, FALSE))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_SOF2:		/* Progressive, Huffman */
-      if (! get_sof(cinfo, FALSE, TRUE, FALSE))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_SOF9:		/* Extended sequential, arithmetic */
-      if (! get_sof(cinfo, FALSE, FALSE, TRUE))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_SOF10:		/* Progressive, arithmetic */
-      if (! get_sof(cinfo, FALSE, TRUE, TRUE))
-	return JPEG_SUSPENDED;
-      break;
-
-    /* Currently unsupported SOFn types */
-    case M_SOF3:		/* Lossless, Huffman */
-    case M_SOF5:		/* Differential sequential, Huffman */
-    case M_SOF6:		/* Differential progressive, Huffman */
-    case M_SOF7:		/* Differential lossless, Huffman */
-    case M_JPG:			/* Reserved for JPEG extensions */
-    case M_SOF11:		/* Lossless, arithmetic */
-    case M_SOF13:		/* Differential sequential, arithmetic */
-    case M_SOF14:		/* Differential progressive, arithmetic */
-    case M_SOF15:		/* Differential lossless, arithmetic */
-      ERREXIT1(cinfo, JERR_SOF_UNSUPPORTED, cinfo->unread_marker);
-      break;
-
-    case M_SOS:
-      if (! get_sos(cinfo))
-	return JPEG_SUSPENDED;
-      cinfo->unread_marker = 0;	/* processed the marker */
-      return JPEG_REACHED_SOS;
-
-    case M_EOI:
-      TRACEMS(cinfo, 1, JTRC_EOI);
-      cinfo->unread_marker = 0;	/* processed the marker */
-      return JPEG_REACHED_EOI;
-
-    case M_DAC:
-      if (! get_dac(cinfo))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_DHT:
-      if (! get_dht(cinfo))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_DQT:
-      if (! get_dqt(cinfo))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_DRI:
-      if (! get_dri(cinfo))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_JPG8:
-      if (! get_lse(cinfo))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_APP0:
-    case M_APP1:
-    case M_APP2:
-    case M_APP3:
-    case M_APP4:
-    case M_APP5:
-    case M_APP6:
-    case M_APP7:
-    case M_APP8:
-    case M_APP9:
-    case M_APP10:
-    case M_APP11:
-    case M_APP12:
-    case M_APP13:
-    case M_APP14:
-    case M_APP15:
-      if (! (*((my_marker_ptr) cinfo->marker)->process_APPn[
-		cinfo->unread_marker - (int) M_APP0]) (cinfo))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_COM:
-      if (! (*((my_marker_ptr) cinfo->marker)->process_COM) (cinfo))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_RST0:		/* these are all parameterless */
-    case M_RST1:
-    case M_RST2:
-    case M_RST3:
-    case M_RST4:
-    case M_RST5:
-    case M_RST6:
-    case M_RST7:
-    case M_TEM:
-      TRACEMS1(cinfo, 1, JTRC_PARMLESS_MARKER, cinfo->unread_marker);
-      break;
-
-    case M_DNL:			/* Ignore DNL ... perhaps the wrong thing */
-      if (! skip_variable(cinfo))
-	return JPEG_SUSPENDED;
-      break;
-
-    default:			/* must be DHP, EXP, JPGn, or RESn */
-      /* For now, we treat the reserved markers as fatal errors since they are
-       * likely to be used to signal incompatible JPEG Part 3 extensions.
-       * Once the JPEG 3 version-number marker is well defined, this code
-       * ought to change!
-       */
-      ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
-    }
-    /* Successfully processed marker, so reset state variable */
-    cinfo->unread_marker = 0;
-  } /* end loop */
-}
-
-
-/*
- * Read a restart marker, which is expected to appear next in the datastream;
- * if the marker is not there, take appropriate recovery action.
- * Returns FALSE if suspension is required.
- *
- * This is called by the entropy decoder after it has read an appropriate
- * number of MCUs.  cinfo->unread_marker may be nonzero if the entropy decoder
- * has already read a marker from the data source.  Under normal conditions
- * cinfo->unread_marker will be reset to 0 before returning; if not reset,
- * it holds a marker which the decoder will be unable to read past.
- */
-
-METHODDEF(boolean)
-read_restart_marker (j_decompress_ptr cinfo)
-{
-  /* Obtain a marker unless we already did. */
-  /* Note that next_marker will complain if it skips any data. */
-  if (cinfo->unread_marker == 0) {
-    if (! next_marker(cinfo))
-      return FALSE;
-  }
-
-  if (cinfo->unread_marker ==
-      ((int) M_RST0 + cinfo->marker->next_restart_num)) {
-    /* Normal case --- swallow the marker and let entropy decoder continue */
-    TRACEMS1(cinfo, 3, JTRC_RST, cinfo->marker->next_restart_num);
-    cinfo->unread_marker = 0;
-  } else {
-    /* Uh-oh, the restart markers have been messed up. */
-    /* Let the data source manager determine how to resync. */
-    if (! (*cinfo->src->resync_to_restart) (cinfo,
-					    cinfo->marker->next_restart_num))
-      return FALSE;
-  }
-
-  /* Update next-restart state */
-  cinfo->marker->next_restart_num = (cinfo->marker->next_restart_num + 1) & 7;
-
-  return TRUE;
-}
-
-
-/*
- * This is the default resync_to_restart method for data source managers
- * to use if they don't have any better approach.  Some data source managers
- * may be able to back up, or may have additional knowledge about the data
- * which permits a more intelligent recovery strategy; such managers would
- * presumably supply their own resync method.
- *
- * read_restart_marker calls resync_to_restart if it finds a marker other than
- * the restart marker it was expecting.  (This code is *not* used unless
- * a nonzero restart interval has been declared.)  cinfo->unread_marker is
- * the marker code actually found (might be anything, except 0 or FF).
- * The desired restart marker number (0..7) is passed as a parameter.
- * This routine is supposed to apply whatever error recovery strategy seems
- * appropriate in order to position the input stream to the next data segment.
- * Note that cinfo->unread_marker is treated as a marker appearing before
- * the current data-source input point; usually it should be reset to zero
- * before returning.
- * Returns FALSE if suspension is required.
- *
- * This implementation is substantially constrained by wanting to treat the
- * input as a data stream; this means we can't back up.  Therefore, we have
- * only the following actions to work with:
- *   1. Simply discard the marker and let the entropy decoder resume at next
- *      byte of file.
- *   2. Read forward until we find another marker, discarding intervening
- *      data.  (In theory we could look ahead within the current bufferload,
- *      without having to discard data if we don't find the desired marker.
- *      This idea is not implemented here, in part because it makes behavior
- *      dependent on buffer size and chance buffer-boundary positions.)
- *   3. Leave the marker unread (by failing to zero cinfo->unread_marker).
- *      This will cause the entropy decoder to process an empty data segment,
- *      inserting dummy zeroes, and then we will reprocess the marker.
- *
- * #2 is appropriate if we think the desired marker lies ahead, while #3 is
- * appropriate if the found marker is a future restart marker (indicating
- * that we have missed the desired restart marker, probably because it got
- * corrupted).
- * We apply #2 or #3 if the found marker is a restart marker no more than
- * two counts behind or ahead of the expected one.  We also apply #2 if the
- * found marker is not a legal JPEG marker code (it's certainly bogus data).
- * If the found marker is a restart marker more than 2 counts away, we do #1
- * (too much risk that the marker is erroneous; with luck we will be able to
- * resync at some future point).
- * For any valid non-restart JPEG marker, we apply #3.  This keeps us from
- * overrunning the end of a scan.  An implementation limited to single-scan
- * files might find it better to apply #2 for markers other than EOI, since
- * any other marker would have to be bogus data in that case.
- */
-
-GLOBAL(boolean)
-jpeg_resync_to_restart (j_decompress_ptr cinfo, int desired)
-{
-  int marker = cinfo->unread_marker;
-  int action = 1;
-  
-  /* Always put up a warning. */
-  WARNMS2(cinfo, JWRN_MUST_RESYNC, marker, desired);
-  
-  /* Outer loop handles repeated decision after scanning forward. */
-  for (;;) {
-    if (marker < (int) M_SOF0)
-      action = 2;		/* invalid marker */
-    else if (marker < (int) M_RST0 || marker > (int) M_RST7)
-      action = 3;		/* valid non-restart marker */
-    else {
-      if (marker == ((int) M_RST0 + ((desired+1) & 7)) ||
-	  marker == ((int) M_RST0 + ((desired+2) & 7)))
-	action = 3;		/* one of the next two expected restarts */
-      else if (marker == ((int) M_RST0 + ((desired-1) & 7)) ||
-	       marker == ((int) M_RST0 + ((desired-2) & 7)))
-	action = 2;		/* a prior restart, so advance */
-      else
-	action = 1;		/* desired restart or too far away */
-    }
-    TRACEMS2(cinfo, 4, JTRC_RECOVERY_ACTION, marker, action);
-    switch (action) {
-    case 1:
-      /* Discard marker and let entropy decoder resume processing. */
-      cinfo->unread_marker = 0;
-      return TRUE;
-    case 2:
-      /* Scan to the next marker, and repeat the decision loop. */
-      if (! next_marker(cinfo))
-	return FALSE;
-      marker = cinfo->unread_marker;
-      break;
-    case 3:
-      /* Return without advancing past this marker. */
-      /* Entropy decoder will be forced to process an empty segment. */
-      return TRUE;
-    }
-  } /* end loop */
-}
-
-
-/*
- * Reset marker processing state to begin a fresh datastream.
- */
-
-METHODDEF(void)
-reset_marker_reader (j_decompress_ptr cinfo)
-{
-  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
-
-  cinfo->comp_info = NULL;		/* until allocated by get_sof */
-  cinfo->input_scan_number = 0;		/* no SOS seen yet */
-  cinfo->unread_marker = 0;		/* no pending marker */
-  marker->pub.saw_SOI = FALSE;		/* set internal state too */
-  marker->pub.saw_SOF = FALSE;
-  marker->pub.discarded_bytes = 0;
-  marker->cur_marker = NULL;
-}
-
-
-/*
- * Initialize the marker reader module.
- * This is called only once, when the decompression object is created.
- */
-
-GLOBAL(void)
-jinit_marker_reader (j_decompress_ptr cinfo)
-{
-  my_marker_ptr marker;
-  int i;
-
-  /* Create subobject in permanent pool */
-  marker = (my_marker_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_PERMANENT, SIZEOF(my_marker_reader));
-  cinfo->marker = &marker->pub;
-  /* Initialize public method pointers */
-  marker->pub.reset_marker_reader = reset_marker_reader;
-  marker->pub.read_markers = read_markers;
-  marker->pub.read_restart_marker = read_restart_marker;
-  /* Initialize COM/APPn processing.
-   * By default, we examine and then discard APP0 and APP14,
-   * but simply discard COM and all other APPn.
-   */
-  marker->process_COM = skip_variable;
-  marker->length_limit_COM = 0;
-  for (i = 0; i < 16; i++) {
-    marker->process_APPn[i] = skip_variable;
-    marker->length_limit_APPn[i] = 0;
-  }
-  marker->process_APPn[0] = get_interesting_appn;
-  marker->process_APPn[14] = get_interesting_appn;
-  /* Reset marker processing state */
-  reset_marker_reader(cinfo);
-}
-
-
-/*
- * Control saving of COM and APPn markers into marker_list.
- */
-
-#ifdef SAVE_MARKERS_SUPPORTED
-
-GLOBAL(void)
-jpeg_save_markers (j_decompress_ptr cinfo, int marker_code,
-		   unsigned int length_limit)
-{
-  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
-  long maxlength;
-  jpeg_marker_parser_method processor;
-
-  /* Length limit mustn't be larger than what we can allocate
-   * (should only be a concern in a 16-bit environment).
-   */
-  maxlength = cinfo->mem->max_alloc_chunk - SIZEOF(struct jpeg_marker_struct);
-  if (((long) length_limit) > maxlength)
-    length_limit = (unsigned int) maxlength;
-
-  /* Choose processor routine to use.
-   * APP0/APP14 have special requirements.
-   */
-  if (length_limit) {
-    processor = save_marker;
-    /* If saving APP0/APP14, save at least enough for our internal use. */
-    if (marker_code == (int) M_APP0 && length_limit < APP0_DATA_LEN)
-      length_limit = APP0_DATA_LEN;
-    else if (marker_code == (int) M_APP14 && length_limit < APP14_DATA_LEN)
-      length_limit = APP14_DATA_LEN;
-  } else {
-    processor = skip_variable;
-    /* If discarding APP0/APP14, use our regular on-the-fly processor. */
-    if (marker_code == (int) M_APP0 || marker_code == (int) M_APP14)
-      processor = get_interesting_appn;
-  }
-
-  if (marker_code == (int) M_COM) {
-    marker->process_COM = processor;
-    marker->length_limit_COM = length_limit;
-  } else if (marker_code >= (int) M_APP0 && marker_code <= (int) M_APP15) {
-    marker->process_APPn[marker_code - (int) M_APP0] = processor;
-    marker->length_limit_APPn[marker_code - (int) M_APP0] = length_limit;
-  } else
-    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, marker_code);
-}
-
-#endif /* SAVE_MARKERS_SUPPORTED */
-
-
-/*
- * Install a special processing method for COM or APPn markers.
- */
-
-GLOBAL(void)
-jpeg_set_marker_processor (j_decompress_ptr cinfo, int marker_code,
-			   jpeg_marker_parser_method routine)
-{
-  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
-
-  if (marker_code == (int) M_COM)
-    marker->process_COM = routine;
-  else if (marker_code >= (int) M_APP0 && marker_code <= (int) M_APP15)
-    marker->process_APPn[marker_code - (int) M_APP0] = routine;
-  else
-    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, marker_code);
-}
diff --git a/dep/libjpeg/src/jdmaster.c b/dep/libjpeg/src/jdmaster.c
deleted file mode 100644
index 3070b7bb4..000000000
--- a/dep/libjpeg/src/jdmaster.c
+++ /dev/null
@@ -1,532 +0,0 @@
-/*
- * jdmaster.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2002-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains master control logic for the JPEG decompressor.
- * These routines are concerned with selecting the modules to be executed
- * and with determining the number of passes and the work to be done in each
- * pass.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Private state */
-
-typedef struct {
-  struct jpeg_decomp_master pub; /* public fields */
-
-  int pass_number;		/* # of passes completed */
-
-  boolean using_merged_upsample; /* TRUE if using merged upsample/cconvert */
-
-  /* Saved references to initialized quantizer modules,
-   * in case we need to switch modes.
-   */
-  struct jpeg_color_quantizer * quantizer_1pass;
-  struct jpeg_color_quantizer * quantizer_2pass;
-} my_decomp_master;
-
-typedef my_decomp_master * my_master_ptr;
-
-
-/*
- * Determine whether merged upsample/color conversion should be used.
- * CRUCIAL: this must match the actual capabilities of jdmerge.c!
- */
-
-LOCAL(boolean)
-use_merged_upsample (j_decompress_ptr cinfo)
-{
-#ifdef UPSAMPLE_MERGING_SUPPORTED
-  /* Merging is the equivalent of plain box-filter upsampling. */
-  /* The following condition is only needed if fancy shall select
-   * a different upsampling method.  In our current implementation
-   * fancy only affects the DCT scaling, thus we can use fancy
-   * upsampling and merged upsample simultaneously, in particular
-   * with scaled DCT sizes larger than the default DCTSIZE.
-   */
-#if 0
-  if (cinfo->do_fancy_upsampling)
-    return FALSE;
-#endif
-  if (cinfo->CCIR601_sampling)
-    return FALSE;
-  /* jdmerge.c only supports YCC=>RGB color conversion */
-  if ((cinfo->jpeg_color_space != JCS_YCbCr &&
-       cinfo->jpeg_color_space != JCS_BG_YCC) ||
-      cinfo->num_components != 3 ||
-      cinfo->out_color_space != JCS_RGB ||
-      cinfo->out_color_components != RGB_PIXELSIZE ||
-      cinfo->color_transform)
-    return FALSE;
-  /* and it only handles 2h1v or 2h2v sampling ratios */
-  if (cinfo->comp_info[0].h_samp_factor != 2 ||
-      cinfo->comp_info[1].h_samp_factor != 1 ||
-      cinfo->comp_info[2].h_samp_factor != 1 ||
-      cinfo->comp_info[0].v_samp_factor >  2 ||
-      cinfo->comp_info[1].v_samp_factor != 1 ||
-      cinfo->comp_info[2].v_samp_factor != 1)
-    return FALSE;
-  /* furthermore, it doesn't work if we've scaled the IDCTs differently */
-  if (cinfo->comp_info[0].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size ||
-      cinfo->comp_info[1].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size ||
-      cinfo->comp_info[2].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size ||
-      cinfo->comp_info[0].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size ||
-      cinfo->comp_info[1].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size ||
-      cinfo->comp_info[2].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size)
-    return FALSE;
-  /* ??? also need to test for upsample-time rescaling, when & if supported */
-  return TRUE;			/* by golly, it'll work... */
-#else
-  return FALSE;
-#endif
-}
-
-
-/*
- * Compute output image dimensions and related values.
- * NOTE: this is exported for possible use by application.
- * Hence it mustn't do anything that can't be done twice.
- * Also note that it may be called before the master module is initialized!
- */
-
-GLOBAL(void)
-jpeg_calc_output_dimensions (j_decompress_ptr cinfo)
-/* Do computations that are needed before master selection phase.
- * This function is used for full decompression.
- */
-{
-  int ci, i;
-  jpeg_component_info *compptr;
-
-  /* Prevent application from calling me at wrong times */
-  if (cinfo->global_state != DSTATE_READY)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  /* Compute core output image dimensions and DCT scaling choices. */
-  jpeg_core_output_dimensions(cinfo);
-
-#ifdef IDCT_SCALING_SUPPORTED
-
-  /* In selecting the actual DCT scaling for each component, we try to
-   * scale up the chroma components via IDCT scaling rather than upsampling.
-   * This saves time if the upsampler gets to use 1:1 scaling.
-   * Note this code adapts subsampling ratios which are powers of 2.
-   */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    int ssize = 1;
-    if (! cinfo->raw_data_out)
-      while (cinfo->min_DCT_h_scaled_size * ssize <=
-	     (cinfo->do_fancy_upsampling ? DCTSIZE : DCTSIZE / 2) &&
-	     (cinfo->max_h_samp_factor % (compptr->h_samp_factor * ssize * 2)) ==
-	     0) {
-	ssize = ssize * 2;
-      }
-    compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size * ssize;
-    ssize = 1;
-    if (! cinfo->raw_data_out)
-      while (cinfo->min_DCT_v_scaled_size * ssize <=
-	     (cinfo->do_fancy_upsampling ? DCTSIZE : DCTSIZE / 2) &&
-	     (cinfo->max_v_samp_factor % (compptr->v_samp_factor * ssize * 2)) ==
-	     0) {
-	ssize = ssize * 2;
-      }
-    compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size * ssize;
-
-    /* We don't support IDCT ratios larger than 2. */
-    if (compptr->DCT_h_scaled_size > compptr->DCT_v_scaled_size * 2)
-	compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size * 2;
-    else if (compptr->DCT_v_scaled_size > compptr->DCT_h_scaled_size * 2)
-	compptr->DCT_v_scaled_size = compptr->DCT_h_scaled_size * 2;
-
-    /* Recompute downsampled dimensions of components;
-     * application needs to know these if using raw downsampled data.
-     */
-    /* Size in samples, after IDCT scaling */
-    compptr->downsampled_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width *
-		    (long) (compptr->h_samp_factor * compptr->DCT_h_scaled_size),
-		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
-    compptr->downsampled_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height *
-		    (long) (compptr->v_samp_factor * compptr->DCT_v_scaled_size),
-		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
-  }
-
-#endif /* IDCT_SCALING_SUPPORTED */
-
-  /* Report number of components in selected colorspace. */
-  /* This should correspond to the actual code in the color conversion module. */
-  switch (cinfo->out_color_space) {
-  case JCS_GRAYSCALE:
-    cinfo->out_color_components = 1;
-    break;
-  case JCS_RGB:
-  case JCS_BG_RGB:
-    cinfo->out_color_components = RGB_PIXELSIZE;
-    break;
-  default:	/* YCCK <=> CMYK conversion or same colorspace as in file */
-    i = 0;
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++)
-      if (compptr->component_needed)
-	i++;	/* count output color components */
-    cinfo->out_color_components = i;
-  }
-  cinfo->output_components = (cinfo->quantize_colors ? 1 :
-			      cinfo->out_color_components);
-
-  /* See if upsampler will want to emit more than one row at a time */
-  if (use_merged_upsample(cinfo))
-    cinfo->rec_outbuf_height = cinfo->max_v_samp_factor;
-  else
-    cinfo->rec_outbuf_height = 1;
-}
-
-
-/*
- * Several decompression processes need to range-limit values to the range
- * 0..MAXJSAMPLE; the input value may fall somewhat outside this range
- * due to noise introduced by quantization, roundoff error, etc.  These
- * processes are inner loops and need to be as fast as possible.  On most
- * machines, particularly CPUs with pipelines or instruction prefetch,
- * a (subscript-check-less) C table lookup
- *		x = sample_range_limit[x];
- * is faster than explicit tests
- *		if (x < 0)  x = 0;
- *		else if (x > MAXJSAMPLE)  x = MAXJSAMPLE;
- * These processes all use a common table prepared by the routine below.
- *
- * For most steps we can mathematically guarantee that the initial value
- * of x is within 2*(MAXJSAMPLE+1) of the legal range, so a table running
- * from -2*(MAXJSAMPLE+1) to 3*MAXJSAMPLE+2 is sufficient.  But for the
- * initial limiting step (just after the IDCT), a wildly out-of-range value
- * is possible if the input data is corrupt.  To avoid any chance of indexing
- * off the end of memory and getting a bad-pointer trap, we perform the
- * post-IDCT limiting thus:
- *		x = (sample_range_limit - SUBSET)[(x + CENTER) & MASK];
- * where MASK is 2 bits wider than legal sample data, ie 10 bits for 8-bit
- * samples.  Under normal circumstances this is more than enough range and
- * a correct output will be generated; with bogus input data the mask will
- * cause wraparound, and we will safely generate a bogus-but-in-range output.
- * For the post-IDCT step, we want to convert the data from signed to unsigned
- * representation by adding CENTERJSAMPLE at the same time that we limit it.
- * This is accomplished with SUBSET = CENTER - CENTERJSAMPLE.
- *
- * Note that the table is allocated in near data space on PCs; it's small
- * enough and used often enough to justify this.
- */
-
-LOCAL(void)
-prepare_range_limit_table (j_decompress_ptr cinfo)
-/* Allocate and fill in the sample_range_limit table */
-{
-  JSAMPLE * table;
-  int i;
-
-  table = (JSAMPLE *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo,
-    JPOOL_IMAGE, (RANGE_CENTER * 2 + MAXJSAMPLE + 1) * SIZEOF(JSAMPLE));
-  /* First segment of range limit table: limit[x] = 0 for x < 0 */
-  MEMZERO(table, RANGE_CENTER * SIZEOF(JSAMPLE));
-  table += RANGE_CENTER;	/* allow negative subscripts of table */
-  cinfo->sample_range_limit = table;
-  /* Main part of range limit table: limit[x] = x */
-  for (i = 0; i <= MAXJSAMPLE; i++)
-    table[i] = (JSAMPLE) i;
-  /* End of range limit table: limit[x] = MAXJSAMPLE for x > MAXJSAMPLE */
-  for (; i <=  MAXJSAMPLE + RANGE_CENTER; i++)
-    table[i] = MAXJSAMPLE;
-}
-
-
-/*
- * Master selection of decompression modules.
- * This is done once at jpeg_start_decompress time.  We determine
- * which modules will be used and give them appropriate initialization calls.
- * We also initialize the decompressor input side to begin consuming data.
- *
- * Since jpeg_read_header has finished, we know what is in the SOF
- * and (first) SOS markers.  We also have all the application parameter
- * settings.
- */
-
-LOCAL(void)
-master_selection (j_decompress_ptr cinfo)
-{
-  my_master_ptr master = (my_master_ptr) cinfo->master;
-  boolean use_c_buffer;
-  long samplesperrow;
-  JDIMENSION jd_samplesperrow;
-
-  /* For now, precision must match compiled-in value... */
-  if (cinfo->data_precision != BITS_IN_JSAMPLE)
-    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
-
-  /* Initialize dimensions and other stuff */
-  jpeg_calc_output_dimensions(cinfo);
-  prepare_range_limit_table(cinfo);
-
-  /* Sanity check on image dimensions */
-  if (cinfo->output_height <= 0 || cinfo->output_width <= 0 ||
-      cinfo->out_color_components <= 0)
-    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
-
-  /* Width of an output scanline must be representable as JDIMENSION. */
-  samplesperrow = (long) cinfo->output_width * (long) cinfo->out_color_components;
-  jd_samplesperrow = (JDIMENSION) samplesperrow;
-  if ((long) jd_samplesperrow != samplesperrow)
-    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
-
-  /* Initialize my private state */
-  master->pass_number = 0;
-  master->using_merged_upsample = use_merged_upsample(cinfo);
-
-  /* Color quantizer selection */
-  master->quantizer_1pass = NULL;
-  master->quantizer_2pass = NULL;
-  /* No mode changes if not using buffered-image mode. */
-  if (! cinfo->quantize_colors || ! cinfo->buffered_image) {
-    cinfo->enable_1pass_quant = FALSE;
-    cinfo->enable_external_quant = FALSE;
-    cinfo->enable_2pass_quant = FALSE;
-  }
-  if (cinfo->quantize_colors) {
-    if (cinfo->raw_data_out)
-      ERREXIT(cinfo, JERR_NOTIMPL);
-    /* 2-pass quantizer only works in 3-component color space. */
-    if (cinfo->out_color_components != 3) {
-      cinfo->enable_1pass_quant = TRUE;
-      cinfo->enable_external_quant = FALSE;
-      cinfo->enable_2pass_quant = FALSE;
-      cinfo->colormap = NULL;
-    } else if (cinfo->colormap != NULL) {
-      cinfo->enable_external_quant = TRUE;
-    } else if (cinfo->two_pass_quantize) {
-      cinfo->enable_2pass_quant = TRUE;
-    } else {
-      cinfo->enable_1pass_quant = TRUE;
-    }
-
-    if (cinfo->enable_1pass_quant) {
-#ifdef QUANT_1PASS_SUPPORTED
-      jinit_1pass_quantizer(cinfo);
-      master->quantizer_1pass = cinfo->cquantize;
-#else
-      ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif
-    }
-
-    /* We use the 2-pass code to map to external colormaps. */
-    if (cinfo->enable_2pass_quant || cinfo->enable_external_quant) {
-#ifdef QUANT_2PASS_SUPPORTED
-      jinit_2pass_quantizer(cinfo);
-      master->quantizer_2pass = cinfo->cquantize;
-#else
-      ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif
-    }
-    /* If both quantizers are initialized, the 2-pass one is left active;
-     * this is necessary for starting with quantization to an external map.
-     */
-  }
-
-  /* Post-processing: in particular, color conversion first */
-  if (! cinfo->raw_data_out) {
-    if (master->using_merged_upsample) {
-#ifdef UPSAMPLE_MERGING_SUPPORTED
-      jinit_merged_upsampler(cinfo); /* does color conversion too */
-#else
-      ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif
-    } else {
-      jinit_color_deconverter(cinfo);
-      jinit_upsampler(cinfo);
-    }
-    jinit_d_post_controller(cinfo, cinfo->enable_2pass_quant);
-  }
-  /* Inverse DCT */
-  jinit_inverse_dct(cinfo);
-  /* Entropy decoding: either Huffman or arithmetic coding. */
-  if (cinfo->arith_code)
-    jinit_arith_decoder(cinfo);
-  else {
-    jinit_huff_decoder(cinfo);
-  }
-
-  /* Initialize principal buffer controllers. */
-  use_c_buffer = cinfo->inputctl->has_multiple_scans || cinfo->buffered_image;
-  jinit_d_coef_controller(cinfo, use_c_buffer);
-
-  if (! cinfo->raw_data_out)
-    jinit_d_main_controller(cinfo, FALSE /* never need full buffer here */);
-
-  /* We can now tell the memory manager to allocate virtual arrays. */
-  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
-
-  /* Initialize input side of decompressor to consume first scan. */
-  (*cinfo->inputctl->start_input_pass) (cinfo);
-
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-  /* If jpeg_start_decompress will read the whole file, initialize
-   * progress monitoring appropriately.  The input step is counted
-   * as one pass.
-   */
-  if (cinfo->progress != NULL && ! cinfo->buffered_image &&
-      cinfo->inputctl->has_multiple_scans) {
-    int nscans;
-    /* Estimate number of scans to set pass_limit. */
-    if (cinfo->progressive_mode) {
-      /* Arbitrarily estimate 2 interleaved DC scans + 3 AC scans/component. */
-      nscans = 2 + 3 * cinfo->num_components;
-    } else {
-      /* For a nonprogressive multiscan file, estimate 1 scan per component. */
-      nscans = cinfo->num_components;
-    }
-    cinfo->progress->pass_counter = 0L;
-    cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows * nscans;
-    cinfo->progress->completed_passes = 0;
-    cinfo->progress->total_passes = (cinfo->enable_2pass_quant ? 3 : 2);
-    /* Count the input pass as done */
-    master->pass_number++;
-  }
-#endif /* D_MULTISCAN_FILES_SUPPORTED */
-}
-
-
-/*
- * Per-pass setup.
- * This is called at the beginning of each output pass.  We determine which
- * modules will be active during this pass and give them appropriate
- * start_pass calls.  We also set is_dummy_pass to indicate whether this
- * is a "real" output pass or a dummy pass for color quantization.
- * (In the latter case, jdapistd.c will crank the pass to completion.)
- */
-
-METHODDEF(void)
-prepare_for_output_pass (j_decompress_ptr cinfo)
-{
-  my_master_ptr master = (my_master_ptr) cinfo->master;
-
-  if (master->pub.is_dummy_pass) {
-#ifdef QUANT_2PASS_SUPPORTED
-    /* Final pass of 2-pass quantization */
-    master->pub.is_dummy_pass = FALSE;
-    (*cinfo->cquantize->start_pass) (cinfo, FALSE);
-    (*cinfo->post->start_pass) (cinfo, JBUF_CRANK_DEST);
-    (*cinfo->main->start_pass) (cinfo, JBUF_CRANK_DEST);
-#else
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif /* QUANT_2PASS_SUPPORTED */
-  } else {
-    if (cinfo->quantize_colors && cinfo->colormap == NULL) {
-      /* Select new quantization method */
-      if (cinfo->two_pass_quantize && cinfo->enable_2pass_quant) {
-	cinfo->cquantize = master->quantizer_2pass;
-	master->pub.is_dummy_pass = TRUE;
-      } else if (cinfo->enable_1pass_quant) {
-	cinfo->cquantize = master->quantizer_1pass;
-      } else {
-	ERREXIT(cinfo, JERR_MODE_CHANGE);
-      }
-    }
-    (*cinfo->idct->start_pass) (cinfo);
-    (*cinfo->coef->start_output_pass) (cinfo);
-    if (! cinfo->raw_data_out) {
-      if (! master->using_merged_upsample)
-	(*cinfo->cconvert->start_pass) (cinfo);
-      (*cinfo->upsample->start_pass) (cinfo);
-      if (cinfo->quantize_colors)
-	(*cinfo->cquantize->start_pass) (cinfo, master->pub.is_dummy_pass);
-      (*cinfo->post->start_pass) (cinfo,
-	    (master->pub.is_dummy_pass ? JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
-      (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
-    }
-  }
-
-  /* Set up progress monitor's pass info if present */
-  if (cinfo->progress != NULL) {
-    cinfo->progress->completed_passes = master->pass_number;
-    cinfo->progress->total_passes = master->pass_number +
-				    (master->pub.is_dummy_pass ? 2 : 1);
-    /* In buffered-image mode, we assume one more output pass if EOI not
-     * yet reached, but no more passes if EOI has been reached.
-     */
-    if (cinfo->buffered_image && ! cinfo->inputctl->eoi_reached) {
-      cinfo->progress->total_passes += (cinfo->enable_2pass_quant ? 2 : 1);
-    }
-  }
-}
-
-
-/*
- * Finish up at end of an output pass.
- */
-
-METHODDEF(void)
-finish_output_pass (j_decompress_ptr cinfo)
-{
-  my_master_ptr master = (my_master_ptr) cinfo->master;
-
-  if (cinfo->quantize_colors)
-    (*cinfo->cquantize->finish_pass) (cinfo);
-  master->pass_number++;
-}
-
-
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-
-/*
- * Switch to a new external colormap between output passes.
- */
-
-GLOBAL(void)
-jpeg_new_colormap (j_decompress_ptr cinfo)
-{
-  my_master_ptr master = (my_master_ptr) cinfo->master;
-
-  /* Prevent application from calling me at wrong times */
-  if (cinfo->global_state != DSTATE_BUFIMAGE)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  if (cinfo->quantize_colors && cinfo->enable_external_quant &&
-      cinfo->colormap != NULL) {
-    /* Select 2-pass quantizer for external colormap use */
-    cinfo->cquantize = master->quantizer_2pass;
-    /* Notify quantizer of colormap change */
-    (*cinfo->cquantize->new_color_map) (cinfo);
-    master->pub.is_dummy_pass = FALSE; /* just in case */
-  } else
-    ERREXIT(cinfo, JERR_MODE_CHANGE);
-}
-
-#endif /* D_MULTISCAN_FILES_SUPPORTED */
-
-
-/*
- * Initialize master decompression control and select active modules.
- * This is performed at the start of jpeg_start_decompress.
- */
-
-GLOBAL(void)
-jinit_master_decompress (j_decompress_ptr cinfo)
-{
-  my_master_ptr master;
-
-  master = (my_master_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_decomp_master));
-  cinfo->master = &master->pub;
-  master->pub.prepare_for_output_pass = prepare_for_output_pass;
-  master->pub.finish_output_pass = finish_output_pass;
-
-  master->pub.is_dummy_pass = FALSE;
-
-  master_selection(cinfo);
-}
diff --git a/dep/libjpeg/src/jdmerge.c b/dep/libjpeg/src/jdmerge.c
deleted file mode 100644
index 0d16821be..000000000
--- a/dep/libjpeg/src/jdmerge.c
+++ /dev/null
@@ -1,437 +0,0 @@
-/*
- * jdmerge.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2013-2022 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains code for merged upsampling/color conversion.
- *
- * This file combines functions from jdsample.c and jdcolor.c;
- * read those files first to understand what's going on.
- *
- * When the chroma components are to be upsampled by simple replication
- * (ie, box filtering), we can save some work in color conversion by
- * calculating all the output pixels corresponding to a pair of chroma
- * samples at one time.  In the conversion equations
- *	R = Y           + K1 * Cr
- *	G = Y + K2 * Cb + K3 * Cr
- *	B = Y + K4 * Cb
- * only the Y term varies among the group of pixels corresponding to a pair
- * of chroma samples, so the rest of the terms can be calculated just once.
- * At typical sampling ratios, this eliminates half or three-quarters
- * of the multiplications needed for color conversion.
- *
- * This file currently provides implementations for the following cases:
- *	YCC => RGB color conversion only (YCbCr or BG_YCC).
- *	Sampling ratios of 2h1v or 2h2v.
- *	No scaling needed at upsample time.
- *	Corner-aligned (non-CCIR601) sampling alignment.
- * Other special cases could be added, but in most applications these
- * are the only common cases.  (For uncommon cases we fall back on
- * the more general code in jdsample.c and jdcolor.c.)
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-#ifdef UPSAMPLE_MERGING_SUPPORTED
-
-
-#if RANGE_BITS < 2
-  /* Deliberate syntax err */
-  Sorry, this code requires 2 or more range extension bits.
-#endif
-
-
-/* Private subobject */
-
-typedef struct {
-  struct jpeg_upsampler pub;	/* public fields */
-
-  /* Pointer to routine to do actual upsampling/conversion of one row group */
-  JMETHOD(void, upmethod, (j_decompress_ptr cinfo,
-			   JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
-			   JSAMPARRAY output_buf));
-
-  /* Private state for YCC->RGB conversion */
-  int * Cr_r_tab;		/* => table for Cr to R conversion */
-  int * Cb_b_tab;		/* => table for Cb to B conversion */
-  INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
-  INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
-
-  /* For 2:1 vertical sampling, we produce two output rows at a time.
-   * We need a "spare" row buffer to hold the second output row if the
-   * application provides just a one-row buffer; we also use the spare
-   * to discard the dummy last row if the image height is odd.
-   */
-  JSAMPROW spare_row;
-  boolean spare_full;		/* T if spare buffer is occupied */
-
-  JDIMENSION out_row_width;	/* samples per output row */
-  JDIMENSION rows_to_go;	/* counts rows remaining in image */
-} my_upsampler;
-
-typedef my_upsampler * my_upsample_ptr;
-
-#define SCALEBITS	16	/* speediest right-shift on some machines */
-#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
-#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
-
-
-/*
- * Initialize tables for YCbCr->RGB and BG_YCC->RGB colorspace conversion.
- * This is taken directly from jdcolor.c; see that file for more info.
- */
-
-LOCAL(void)
-build_ycc_rgb_table (j_decompress_ptr cinfo)
-/* Normal case, sYCC */
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  int i;
-  INT32 x;
-  SHIFT_TEMPS
-
-  upsample->Cr_r_tab = (int *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
-  upsample->Cb_b_tab = (int *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
-  upsample->Cr_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
-  upsample->Cb_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
-
-  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
-    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
-    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
-    /* Cr=>R value is nearest int to 1.402 * x */
-    upsample->Cr_r_tab[i] = (int) DESCALE(FIX(1.402) * x, SCALEBITS);
-    /* Cb=>B value is nearest int to 1.772 * x */
-    upsample->Cb_b_tab[i] = (int) DESCALE(FIX(1.772) * x, SCALEBITS);
-    /* Cr=>G value is scaled-up -0.714136286 * x */
-    upsample->Cr_g_tab[i] = (- FIX(0.714136286)) * x;
-    /* Cb=>G value is scaled-up -0.344136286 * x */
-    /* We also add in ONE_HALF so that need not do it in inner loop */
-    upsample->Cb_g_tab[i] = (- FIX(0.344136286)) * x + ONE_HALF;
-  }
-}
-
-
-LOCAL(void)
-build_bg_ycc_rgb_table (j_decompress_ptr cinfo)
-/* Wide gamut case, bg-sYCC */
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  int i;
-  INT32 x;
-  SHIFT_TEMPS
-
-  upsample->Cr_r_tab = (int *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
-  upsample->Cb_b_tab = (int *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
-  upsample->Cr_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
-  upsample->Cb_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
-
-  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
-    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
-    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
-    /* Cr=>R value is nearest int to 2.804 * x */
-    upsample->Cr_r_tab[i] = (int) DESCALE(FIX(2.804) * x, SCALEBITS);
-    /* Cb=>B value is nearest int to 3.544 * x */
-    upsample->Cb_b_tab[i] = (int) DESCALE(FIX(3.544) * x, SCALEBITS);
-    /* Cr=>G value is scaled-up -1.428272572 * x */
-    upsample->Cr_g_tab[i] = (- FIX(1.428272572)) * x;
-    /* Cb=>G value is scaled-up -0.688272572 * x */
-    /* We also add in ONE_HALF so that need not do it in inner loop */
-    upsample->Cb_g_tab[i] = (- FIX(0.688272572)) * x + ONE_HALF;
-  }
-}
-
-
-/*
- * Initialize for an upsampling pass.
- */
-
-METHODDEF(void)
-start_pass_merged_upsample (j_decompress_ptr cinfo)
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-
-  /* Mark the spare buffer empty */
-  upsample->spare_full = FALSE;
-  /* Initialize total-height counter for detecting bottom of image */
-  upsample->rows_to_go = cinfo->output_height;
-}
-
-
-/*
- * Control routine to do upsampling (and color conversion).
- *
- * The control routine just handles the row buffering considerations.
- */
-
-METHODDEF(void)
-merged_2v_upsample (j_decompress_ptr cinfo,
-		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		    JDIMENSION in_row_groups_avail,
-		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		    JDIMENSION out_rows_avail)
-/* 2:1 vertical sampling case: may need a spare row. */
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  JSAMPROW work_ptrs[2];
-  JDIMENSION num_rows;		/* number of rows returned to caller */
-
-  if (upsample->spare_full) {
-    /* If we have a spare row saved from a previous cycle, just return it. */
-    jcopy_sample_rows(& upsample->spare_row, output_buf + *out_row_ctr,
-		      1, upsample->out_row_width);
-    num_rows = 1;
-    upsample->spare_full = FALSE;
-  } else {
-    /* Figure number of rows to return to caller. */
-    num_rows = 2;
-    /* Not more than the distance to the end of the image. */
-    if (num_rows > upsample->rows_to_go)
-      num_rows = upsample->rows_to_go;
-    /* And not more than what the client can accept: */
-    out_rows_avail -= *out_row_ctr;
-    if (num_rows > out_rows_avail)
-      num_rows = out_rows_avail;
-    /* Create output pointer array for upsampler. */
-    work_ptrs[0] = output_buf[*out_row_ctr];
-    if (num_rows > 1) {
-      work_ptrs[1] = output_buf[*out_row_ctr + 1];
-    } else {
-      work_ptrs[1] = upsample->spare_row;
-      upsample->spare_full = TRUE;
-    }
-    /* Now do the upsampling. */
-    (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr, work_ptrs);
-  }
-
-  /* Adjust counts */
-  *out_row_ctr += num_rows;
-  upsample->rows_to_go -= num_rows;
-  /* When the buffer is emptied, declare this input row group consumed */
-  if (! upsample->spare_full)
-    (*in_row_group_ctr)++;
-}
-
-
-METHODDEF(void)
-merged_1v_upsample (j_decompress_ptr cinfo,
-		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		    JDIMENSION in_row_groups_avail,
-		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		    JDIMENSION out_rows_avail)
-/* 1:1 vertical sampling case: much easier, never need a spare row. */
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-
-  /* Just do the upsampling. */
-  (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr,
-			 output_buf + *out_row_ctr);
-  /* Adjust counts */
-  (*out_row_ctr)++;
-  (*in_row_group_ctr)++;
-}
-
-
-/*
- * These are the routines invoked by the control routines to do
- * the actual upsampling/conversion.  One row group is processed per call.
- *
- * Note: since we may be writing directly into application-supplied buffers,
- * we have to be honest about the output width; we can't assume the buffer
- * has been rounded up to an even width.
- */
-
-
-/*
- * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
- */
-
-METHODDEF(void)
-h2v1_merged_upsample (j_decompress_ptr cinfo,
-		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
-		      JSAMPARRAY output_buf)
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  register int y, cred, cgreen, cblue;
-  int cb, cr;
-  register JSAMPROW outptr;
-  JSAMPROW inptr0, inptr1, inptr2;
-  JDIMENSION col;
-  /* copy these pointers into registers if possible */
-  register JSAMPLE * range_limit = cinfo->sample_range_limit;
-  int * Crrtab = upsample->Cr_r_tab;
-  int * Cbbtab = upsample->Cb_b_tab;
-  INT32 * Crgtab = upsample->Cr_g_tab;
-  INT32 * Cbgtab = upsample->Cb_g_tab;
-  SHIFT_TEMPS
-
-  inptr0 = input_buf[0][in_row_group_ctr];
-  inptr1 = input_buf[1][in_row_group_ctr];
-  inptr2 = input_buf[2][in_row_group_ctr];
-  outptr = output_buf[0];
-  /* Loop for each pair of output pixels */
-  for (col = cinfo->output_width >> 1; col > 0; col--) {
-    /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
-    cr = GETJSAMPLE(*inptr2++);
-    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
-    cblue  = Cbbtab[cb];
-    cred   = Crrtab[cr];
-    /* Fetch 2 Y values and emit 2 pixels */
-    y  = GETJSAMPLE(*inptr0++);
-    outptr[RGB_RED]   = range_limit[y + cred];
-    outptr[RGB_GREEN] = range_limit[y + cgreen];
-    outptr[RGB_BLUE]  = range_limit[y + cblue];
-    outptr += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr0++);
-    outptr[RGB_RED]   = range_limit[y + cred];
-    outptr[RGB_GREEN] = range_limit[y + cgreen];
-    outptr[RGB_BLUE]  = range_limit[y + cblue];
-    outptr += RGB_PIXELSIZE;
-  }
-  /* If image width is odd, do the last output column separately */
-  if (cinfo->output_width & 1) {
-    y  = GETJSAMPLE(*inptr0);
-    cb = GETJSAMPLE(*inptr1);
-    cr = GETJSAMPLE(*inptr2);
-    outptr[RGB_RED]   = range_limit[y + Crrtab[cr]];
-    outptr[RGB_GREEN] = range_limit[y +
-			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
-						 SCALEBITS))];
-    outptr[RGB_BLUE]  = range_limit[y + Cbbtab[cb]];
-  }
-}
-
-
-/*
- * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
- */
-
-METHODDEF(void)
-h2v2_merged_upsample (j_decompress_ptr cinfo,
-		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
-		      JSAMPARRAY output_buf)
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  register int y, cred, cgreen, cblue;
-  int cb, cr;
-  register JSAMPROW outptr0, outptr1;
-  JSAMPROW inptr00, inptr01, inptr1, inptr2;
-  JDIMENSION col;
-  /* copy these pointers into registers if possible */
-  register JSAMPLE * range_limit = cinfo->sample_range_limit;
-  int * Crrtab = upsample->Cr_r_tab;
-  int * Cbbtab = upsample->Cb_b_tab;
-  INT32 * Crgtab = upsample->Cr_g_tab;
-  INT32 * Cbgtab = upsample->Cb_g_tab;
-  SHIFT_TEMPS
-
-  inptr00 = input_buf[0][in_row_group_ctr*2];
-  inptr01 = input_buf[0][in_row_group_ctr*2 + 1];
-  inptr1 = input_buf[1][in_row_group_ctr];
-  inptr2 = input_buf[2][in_row_group_ctr];
-  outptr0 = output_buf[0];
-  outptr1 = output_buf[1];
-  /* Loop for each group of output pixels */
-  for (col = cinfo->output_width >> 1; col > 0; col--) {
-    /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
-    cr = GETJSAMPLE(*inptr2++);
-    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
-    cblue  = Cbbtab[cb];
-    cred   = Crrtab[cr];
-    /* Fetch 4 Y values and emit 4 pixels */
-    y  = GETJSAMPLE(*inptr00++);
-    outptr0[RGB_RED]   = range_limit[y + cred];
-    outptr0[RGB_GREEN] = range_limit[y + cgreen];
-    outptr0[RGB_BLUE]  = range_limit[y + cblue];
-    outptr0 += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr00++);
-    outptr0[RGB_RED]   = range_limit[y + cred];
-    outptr0[RGB_GREEN] = range_limit[y + cgreen];
-    outptr0[RGB_BLUE]  = range_limit[y + cblue];
-    outptr0 += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr01++);
-    outptr1[RGB_RED]   = range_limit[y + cred];
-    outptr1[RGB_GREEN] = range_limit[y + cgreen];
-    outptr1[RGB_BLUE]  = range_limit[y + cblue];
-    outptr1 += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr01++);
-    outptr1[RGB_RED]   = range_limit[y + cred];
-    outptr1[RGB_GREEN] = range_limit[y + cgreen];
-    outptr1[RGB_BLUE]  = range_limit[y + cblue];
-    outptr1 += RGB_PIXELSIZE;
-  }
-  /* If image width is odd, do the last output column separately */
-  if (cinfo->output_width & 1) {
-    cb = GETJSAMPLE(*inptr1);
-    cr = GETJSAMPLE(*inptr2);
-    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
-    cblue  = Cbbtab[cb];
-    cred   = Crrtab[cr];
-    y  = GETJSAMPLE(*inptr00);
-    outptr0[RGB_RED]   = range_limit[y + cred];
-    outptr0[RGB_GREEN] = range_limit[y + cgreen];
-    outptr0[RGB_BLUE]  = range_limit[y + cblue];
-    y  = GETJSAMPLE(*inptr01);
-    outptr1[RGB_RED]   = range_limit[y + cred];
-    outptr1[RGB_GREEN] = range_limit[y + cgreen];
-    outptr1[RGB_BLUE]  = range_limit[y + cblue];
-  }
-}
-
-
-/*
- * Module initialization routine for merged upsampling/color conversion.
- *
- * NB: this is called under the conditions determined by use_merged_upsample()
- * in jdmaster.c.  That routine MUST correspond to the actual capabilities
- * of this module; no safety checks are made here.
- */
-
-GLOBAL(void)
-jinit_merged_upsampler (j_decompress_ptr cinfo)
-{
-  my_upsample_ptr upsample;
-
-  upsample = (my_upsample_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_upsampler));
-  cinfo->upsample = &upsample->pub;
-  upsample->pub.start_pass = start_pass_merged_upsample;
-  upsample->pub.need_context_rows = FALSE;
-
-  upsample->out_row_width = cinfo->output_width * cinfo->out_color_components;
-
-  if (cinfo->max_v_samp_factor == 2) {
-    upsample->pub.upsample = merged_2v_upsample;
-    upsample->upmethod = h2v2_merged_upsample;
-    /* Allocate a spare row buffer */
-    upsample->spare_row = (JSAMPROW) (*cinfo->mem->alloc_large)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       (size_t) upsample->out_row_width * SIZEOF(JSAMPLE));
-  } else {
-    upsample->pub.upsample = merged_1v_upsample;
-    upsample->upmethod = h2v1_merged_upsample;
-    /* No spare row needed */
-    upsample->spare_row = NULL;
-  }
-
-  if (cinfo->jpeg_color_space == JCS_BG_YCC)
-    build_bg_ycc_rgb_table(cinfo);
-  else
-    build_ycc_rgb_table(cinfo);
-}
-
-#endif /* UPSAMPLE_MERGING_SUPPORTED */
diff --git a/dep/libjpeg/src/jdpostct.c b/dep/libjpeg/src/jdpostct.c
deleted file mode 100644
index 571563d72..000000000
--- a/dep/libjpeg/src/jdpostct.c
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * jdpostct.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the decompression postprocessing controller.
- * This controller manages the upsampling, color conversion, and color
- * quantization/reduction steps; specifically, it controls the buffering
- * between upsample/color conversion and color quantization/reduction.
- *
- * If no color quantization/reduction is required, then this module has no
- * work to do, and it just hands off to the upsample/color conversion code.
- * An integrated upsample/convert/quantize process would replace this module
- * entirely.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Private buffer controller object */
-
-typedef struct {
-  struct jpeg_d_post_controller pub; /* public fields */
-
-  /* Color quantization source buffer: this holds output data from
-   * the upsample/color conversion step to be passed to the quantizer.
-   * For two-pass color quantization, we need a full-image buffer;
-   * for one-pass operation, a strip buffer is sufficient.
-   */
-  jvirt_sarray_ptr whole_image;	/* virtual array, or NULL if one-pass */
-  JSAMPARRAY buffer;		/* strip buffer, or current strip of virtual */
-  JDIMENSION strip_height;	/* buffer size in rows */
-  /* for two-pass mode only: */
-  JDIMENSION starting_row;	/* row # of first row in current strip */
-  JDIMENSION next_row;		/* index of next row to fill/empty in strip */
-} my_post_controller;
-
-typedef my_post_controller * my_post_ptr;
-
-
-/* Forward declarations */
-METHODDEF(void) post_process_1pass
-	JPP((j_decompress_ptr cinfo,
-	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-	     JDIMENSION in_row_groups_avail,
-	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-	     JDIMENSION out_rows_avail));
-#ifdef QUANT_2PASS_SUPPORTED
-METHODDEF(void) post_process_prepass
-	JPP((j_decompress_ptr cinfo,
-	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-	     JDIMENSION in_row_groups_avail,
-	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-	     JDIMENSION out_rows_avail));
-METHODDEF(void) post_process_2pass
-	JPP((j_decompress_ptr cinfo,
-	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-	     JDIMENSION in_row_groups_avail,
-	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-	     JDIMENSION out_rows_avail));
-#endif
-
-
-/*
- * Initialize for a processing pass.
- */
-
-METHODDEF(void)
-start_pass_dpost (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
-{
-  my_post_ptr post = (my_post_ptr) cinfo->post;
-
-  switch (pass_mode) {
-  case JBUF_PASS_THRU:
-    if (cinfo->quantize_colors) {
-      /* Single-pass processing with color quantization. */
-      post->pub.post_process_data = post_process_1pass;
-      /* We could be doing buffered-image output before starting a 2-pass
-       * color quantization; in that case, jinit_d_post_controller did not
-       * allocate a strip buffer.  Use the virtual-array buffer as workspace.
-       */
-      if (post->buffer == NULL) {
-	post->buffer = (*cinfo->mem->access_virt_sarray)
-	  ((j_common_ptr) cinfo, post->whole_image,
-	   (JDIMENSION) 0, post->strip_height, TRUE);
-      }
-    } else {
-      /* For single-pass processing without color quantization,
-       * I have no work to do; just call the upsampler directly.
-       */
-      post->pub.post_process_data = cinfo->upsample->upsample;
-    }
-    break;
-#ifdef QUANT_2PASS_SUPPORTED
-  case JBUF_SAVE_AND_PASS:
-    /* First pass of 2-pass quantization */
-    if (post->whole_image == NULL)
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    post->pub.post_process_data = post_process_prepass;
-    break;
-  case JBUF_CRANK_DEST:
-    /* Second pass of 2-pass quantization */
-    if (post->whole_image == NULL)
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    post->pub.post_process_data = post_process_2pass;
-    break;
-#endif /* QUANT_2PASS_SUPPORTED */
-  default:
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    break;
-  }
-  post->starting_row = post->next_row = 0;
-}
-
-
-/*
- * Process some data in the one-pass (strip buffer) case.
- * This is used for color precision reduction as well as one-pass quantization.
- */
-
-METHODDEF(void)
-post_process_1pass (j_decompress_ptr cinfo,
-		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		    JDIMENSION in_row_groups_avail,
-		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		    JDIMENSION out_rows_avail)
-{
-  my_post_ptr post = (my_post_ptr) cinfo->post;
-  JDIMENSION num_rows, max_rows;
-
-  /* Fill the buffer, but not more than what we can dump out in one go. */
-  /* Note we rely on the upsampler to detect bottom of image. */
-  max_rows = out_rows_avail - *out_row_ctr;
-  if (max_rows > post->strip_height)
-    max_rows = post->strip_height;
-  num_rows = 0;
-  (*cinfo->upsample->upsample) (cinfo,
-		input_buf, in_row_group_ctr, in_row_groups_avail,
-		post->buffer, &num_rows, max_rows);
-  /* Quantize and emit data. */
-  (*cinfo->cquantize->color_quantize) (cinfo,
-		post->buffer, output_buf + *out_row_ctr, (int) num_rows);
-  *out_row_ctr += num_rows;
-}
-
-
-#ifdef QUANT_2PASS_SUPPORTED
-
-/*
- * Process some data in the first pass of 2-pass quantization.
- */
-
-METHODDEF(void)
-post_process_prepass (j_decompress_ptr cinfo,
-		      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		      JDIMENSION in_row_groups_avail,
-		      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		      JDIMENSION out_rows_avail)
-{
-  my_post_ptr post = (my_post_ptr) cinfo->post;
-  JDIMENSION old_next_row, num_rows;
-
-  /* Reposition virtual buffer if at start of strip. */
-  if (post->next_row == 0) {
-    post->buffer = (*cinfo->mem->access_virt_sarray)
-	((j_common_ptr) cinfo, post->whole_image,
-	 post->starting_row, post->strip_height, TRUE);
-  }
-
-  /* Upsample some data (up to a strip height's worth). */
-  old_next_row = post->next_row;
-  (*cinfo->upsample->upsample) (cinfo,
-		input_buf, in_row_group_ctr, in_row_groups_avail,
-		post->buffer, &post->next_row, post->strip_height);
-
-  /* Allow quantizer to scan new data.  No data is emitted, */
-  /* but we advance out_row_ctr so outer loop can tell when we're done. */
-  if (post->next_row > old_next_row) {
-    num_rows = post->next_row - old_next_row;
-    (*cinfo->cquantize->color_quantize) (cinfo, post->buffer + old_next_row,
-					 (JSAMPARRAY) NULL, (int) num_rows);
-    *out_row_ctr += num_rows;
-  }
-
-  /* Advance if we filled the strip. */
-  if (post->next_row >= post->strip_height) {
-    post->starting_row += post->strip_height;
-    post->next_row = 0;
-  }
-}
-
-
-/*
- * Process some data in the second pass of 2-pass quantization.
- */
-
-METHODDEF(void)
-post_process_2pass (j_decompress_ptr cinfo,
-		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		    JDIMENSION in_row_groups_avail,
-		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		    JDIMENSION out_rows_avail)
-{
-  my_post_ptr post = (my_post_ptr) cinfo->post;
-  JDIMENSION num_rows, max_rows;
-
-  /* Reposition virtual buffer if at start of strip. */
-  if (post->next_row == 0) {
-    post->buffer = (*cinfo->mem->access_virt_sarray)
-	((j_common_ptr) cinfo, post->whole_image,
-	 post->starting_row, post->strip_height, FALSE);
-  }
-
-  /* Determine number of rows to emit. */
-  num_rows = post->strip_height - post->next_row; /* available in strip */
-  max_rows = out_rows_avail - *out_row_ctr; /* available in output area */
-  if (num_rows > max_rows)
-    num_rows = max_rows;
-  /* We have to check bottom of image here, can't depend on upsampler. */
-  max_rows = cinfo->output_height - post->starting_row;
-  if (num_rows > max_rows)
-    num_rows = max_rows;
-
-  /* Quantize and emit data. */
-  (*cinfo->cquantize->color_quantize) (cinfo,
-		post->buffer + post->next_row, output_buf + *out_row_ctr,
-		(int) num_rows);
-  *out_row_ctr += num_rows;
-
-  /* Advance if we filled the strip. */
-  post->next_row += num_rows;
-  if (post->next_row >= post->strip_height) {
-    post->starting_row += post->strip_height;
-    post->next_row = 0;
-  }
-}
-
-#endif /* QUANT_2PASS_SUPPORTED */
-
-
-/*
- * Initialize postprocessing controller.
- */
-
-GLOBAL(void)
-jinit_d_post_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
-{
-  my_post_ptr post;
-
-  post = (my_post_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_post_controller));
-  cinfo->post = (struct jpeg_d_post_controller *) post;
-  post->pub.start_pass = start_pass_dpost;
-  post->whole_image = NULL;	/* flag for no virtual arrays */
-  post->buffer = NULL;		/* flag for no strip buffer */
-
-  /* Create the quantization buffer, if needed */
-  if (cinfo->quantize_colors) {
-    /* The buffer strip height is max_v_samp_factor, which is typically
-     * an efficient number of rows for upsampling to return.
-     * (In the presence of output rescaling, we might want to be smarter?)
-     */
-    post->strip_height = (JDIMENSION) cinfo->max_v_samp_factor;
-    if (need_full_buffer) {
-      /* Two-pass color quantization: need full-image storage. */
-      /* We round up the number of rows to a multiple of the strip height. */
-#ifdef QUANT_2PASS_SUPPORTED
-      post->whole_image = (*cinfo->mem->request_virt_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
-	 cinfo->output_width * cinfo->out_color_components,
-	 (JDIMENSION) jround_up((long) cinfo->output_height,
-				(long) post->strip_height),
-	 post->strip_height);
-#else
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-#endif /* QUANT_2PASS_SUPPORTED */
-    } else {
-      /* One-pass color quantization: just make a strip buffer. */
-      post->buffer = (*cinfo->mem->alloc_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE,
-	 cinfo->output_width * cinfo->out_color_components,
-	 post->strip_height);
-    }
-  }
-}
diff --git a/dep/libjpeg/src/jdsample.c b/dep/libjpeg/src/jdsample.c
deleted file mode 100644
index 15afeafe3..000000000
--- a/dep/libjpeg/src/jdsample.c
+++ /dev/null
@@ -1,341 +0,0 @@
-/*
- * jdsample.c
- *
- * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modified 2002-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains upsampling routines.
- *
- * Upsampling input data is counted in "row groups".  A row group
- * is defined to be (v_samp_factor * DCT_v_scaled_size / min_DCT_v_scaled_size)
- * sample rows of each component.  Upsampling will normally produce
- * max_v_samp_factor pixel rows from each row group (but this could vary
- * if the upsampler is applying a scale factor of its own).
- *
- * An excellent reference for image resampling is
- *   Digital Image Warping, George Wolberg, 1990.
- *   Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Pointer to routine to upsample a single component */
-typedef JMETHOD(void, upsample1_ptr,
-		(j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY input_data, JSAMPIMAGE output_data_ptr));
-
-/* Private subobject */
-
-typedef struct {
-  struct jpeg_upsampler pub;	/* public fields */
-
-  /* Color conversion buffer.  When using separate upsampling and color
-   * conversion steps, this buffer holds one upsampled row group until it
-   * has been color converted and output.
-   * Note: we do not allocate any storage for component(s) which are full-size,
-   * ie do not need rescaling.  The corresponding entry of color_buf[] is
-   * simply set to point to the input data array, thereby avoiding copying.
-   */
-  JSAMPARRAY color_buf[MAX_COMPONENTS];
-
-  /* Per-component upsampling method pointers */
-  upsample1_ptr methods[MAX_COMPONENTS];
-
-  int next_row_out;		/* counts rows emitted from color_buf */
-  JDIMENSION rows_to_go;	/* counts rows remaining in image */
-
-  /* Height of an input row group for each component. */
-  int rowgroup_height[MAX_COMPONENTS];
-
-  /* These arrays save pixel expansion factors so that int_expand need not
-   * recompute them each time.  They are unused for other upsampling methods.
-   */
-  UINT8 h_expand[MAX_COMPONENTS];
-  UINT8 v_expand[MAX_COMPONENTS];
-} my_upsampler;
-
-typedef my_upsampler * my_upsample_ptr;
-
-
-/*
- * Initialize for an upsampling pass.
- */
-
-METHODDEF(void)
-start_pass_upsample (j_decompress_ptr cinfo)
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-
-  /* Mark the conversion buffer empty */
-  upsample->next_row_out = cinfo->max_v_samp_factor;
-  /* Initialize total-height counter for detecting bottom of image */
-  upsample->rows_to_go = cinfo->output_height;
-}
-
-
-/*
- * Control routine to do upsampling (and color conversion).
- *
- * In this version we upsample each component independently.
- * We upsample one row group into the conversion buffer, then apply
- * color conversion a row at a time.
- */
-
-METHODDEF(void)
-sep_upsample (j_decompress_ptr cinfo,
-	      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-	      JDIMENSION in_row_groups_avail,
-	      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-	      JDIMENSION out_rows_avail)
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  int ci;
-  jpeg_component_info * compptr;
-  JDIMENSION num_rows;
-
-  /* Fill the conversion buffer, if it's empty */
-  if (upsample->next_row_out >= cinfo->max_v_samp_factor) {
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      /* Don't bother to upsample an uninteresting component. */
-      if (! compptr->component_needed)
-	continue;
-      /* Invoke per-component upsample method.  Notice we pass a POINTER
-       * to color_buf[ci], so that fullsize_upsample can change it.
-       */
-      (*upsample->methods[ci]) (cinfo, compptr,
-	input_buf[ci] + (*in_row_group_ctr * upsample->rowgroup_height[ci]),
-	upsample->color_buf + ci);
-    }
-    upsample->next_row_out = 0;
-  }
-
-  /* Color-convert and emit rows */
-
-  /* How many we have in the buffer: */
-  num_rows = (JDIMENSION) (cinfo->max_v_samp_factor - upsample->next_row_out);
-  /* Not more than the distance to the end of the image.  Need this test
-   * in case the image height is not a multiple of max_v_samp_factor:
-   */
-  if (num_rows > upsample->rows_to_go) 
-    num_rows = upsample->rows_to_go;
-  /* And not more than what the client can accept: */
-  out_rows_avail -= *out_row_ctr;
-  if (num_rows > out_rows_avail)
-    num_rows = out_rows_avail;
-
-  (*cinfo->cconvert->color_convert) (cinfo, upsample->color_buf,
-				     (JDIMENSION) upsample->next_row_out,
-				     output_buf + *out_row_ctr,
-				     (int) num_rows);
-
-  /* Adjust counts */
-  *out_row_ctr += num_rows;
-  upsample->rows_to_go -= num_rows;
-  upsample->next_row_out += num_rows;
-  /* When the buffer is emptied, declare this input row group consumed */
-  if (upsample->next_row_out >= cinfo->max_v_samp_factor)
-    (*in_row_group_ctr)++;
-}
-
-
-/*
- * These are the routines invoked by sep_upsample to upsample pixel values
- * of a single component.  One row group is processed per call.
- */
-
-
-/*
- * For full-size components, we just make color_buf[ci] point at the
- * input buffer, and thus avoid copying any data.  Note that this is
- * safe only because sep_upsample doesn't declare the input row group
- * "consumed" until we are done color converting and emitting it.
- */
-
-METHODDEF(void)
-fullsize_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		   JSAMPARRAY input_data, JSAMPIMAGE output_data_ptr)
-{
-  *output_data_ptr = input_data;
-}
-
-
-/*
- * This version handles any integral sampling ratios.
- * This is not used for typical JPEG files, so it need not be fast.
- * Nor, for that matter, is it particularly accurate: the algorithm is
- * simple replication of the input pixel onto the corresponding output
- * pixels.  The hi-falutin sampling literature refers to this as a
- * "box filter".  A box filter tends to introduce visible artifacts,
- * so if you are actually going to use 3:1 or 4:1 sampling ratios
- * you would be well advised to improve this code.
- */
-
-METHODDEF(void)
-int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	      JSAMPARRAY input_data, JSAMPIMAGE output_data_ptr)
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  JSAMPARRAY output_data, output_end;
-  register JSAMPROW inptr, outptr;
-  register JSAMPLE invalue;
-  register int h;
-  JSAMPROW outend;
-  int h_expand, v_expand;
-
-  h_expand = upsample->h_expand[compptr->component_index];
-  v_expand = upsample->v_expand[compptr->component_index];
-
-  output_data = *output_data_ptr;
-  output_end = output_data + cinfo->max_v_samp_factor;
-  for (; output_data < output_end; output_data += v_expand) {
-    /* Generate one output row with proper horizontal expansion */
-    inptr = *input_data++;
-    outptr = *output_data;
-    outend = outptr + cinfo->output_width;
-    while (outptr < outend) {
-      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
-      for (h = h_expand; h > 0; h--) {
-	*outptr++ = invalue;
-      }
-    }
-    /* Generate any additional output rows by duplicating the first one */
-    if (v_expand > 1) {
-      jcopy_sample_rows(output_data, output_data + 1,
-			v_expand - 1, cinfo->output_width);
-    }
-  }
-}
-
-
-/*
- * Fast processing for the common case of 2:1 horizontal and 1:1 vertical.
- * It's still a box filter.
- */
-
-METHODDEF(void)
-h2v1_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JSAMPARRAY input_data, JSAMPIMAGE output_data_ptr)
-{
-  JSAMPARRAY output_data = *output_data_ptr;
-  register JSAMPROW inptr, outptr;
-  register JSAMPLE invalue;
-  JSAMPROW outend;
-  int outrow;
-
-  for (outrow = 0; outrow < cinfo->max_v_samp_factor; outrow++) {
-    inptr = input_data[outrow];
-    outptr = output_data[outrow];
-    outend = outptr + cinfo->output_width;
-    while (outptr < outend) {
-      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
-      *outptr++ = invalue;
-      *outptr++ = invalue;
-    }
-  }
-}
-
-
-/*
- * Fast processing for the common case of 2:1 horizontal and 2:1 vertical.
- * It's still a box filter.
- */
-
-METHODDEF(void)
-h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JSAMPARRAY input_data, JSAMPIMAGE output_data_ptr)
-{
-  JSAMPARRAY output_data, output_end;
-  register JSAMPROW inptr, outptr;
-  register JSAMPLE invalue;
-  JSAMPROW outend;
-
-  output_data = *output_data_ptr;
-  output_end = output_data + cinfo->max_v_samp_factor;
-  for (; output_data < output_end; output_data += 2) {
-    inptr = *input_data++;
-    outptr = *output_data;
-    outend = outptr + cinfo->output_width;
-    while (outptr < outend) {
-      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
-      *outptr++ = invalue;
-      *outptr++ = invalue;
-    }
-    jcopy_sample_rows(output_data, output_data + 1,
-		      1, cinfo->output_width);
-  }
-}
-
-
-/*
- * Module initialization routine for upsampling.
- */
-
-GLOBAL(void)
-jinit_upsampler (j_decompress_ptr cinfo)
-{
-  my_upsample_ptr upsample;
-  int ci;
-  jpeg_component_info * compptr;
-  int h_in_group, v_in_group, h_out_group, v_out_group;
-
-  upsample = (my_upsample_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_upsampler));
-  cinfo->upsample = &upsample->pub;
-  upsample->pub.start_pass = start_pass_upsample;
-  upsample->pub.upsample = sep_upsample;
-  upsample->pub.need_context_rows = FALSE; /* until we find out differently */
-
-  if (cinfo->CCIR601_sampling)	/* this isn't supported */
-    ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
-
-  /* Verify we can handle the sampling factors, select per-component methods,
-   * and create storage as needed.
-   */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Don't bother to upsample an uninteresting component. */
-    if (! compptr->component_needed)
-      continue;
-    /* Compute size of an "input group" after IDCT scaling.  This many samples
-     * are to be converted to max_h_samp_factor * max_v_samp_factor pixels.
-     */
-    h_in_group = (compptr->h_samp_factor * compptr->DCT_h_scaled_size) /
-		 cinfo->min_DCT_h_scaled_size;
-    v_in_group = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
-		 cinfo->min_DCT_v_scaled_size;
-    h_out_group = cinfo->max_h_samp_factor;
-    v_out_group = cinfo->max_v_samp_factor;
-    upsample->rowgroup_height[ci] = v_in_group; /* save for use later */
-    if (h_in_group == h_out_group && v_in_group == v_out_group) {
-      /* Fullsize components can be processed without any work. */
-      upsample->methods[ci] = fullsize_upsample;
-      continue;		/* don't need to allocate buffer */
-    }
-    if (h_in_group * 2 == h_out_group && v_in_group == v_out_group) {
-      /* Special case for 2h1v upsampling */
-      upsample->methods[ci] = h2v1_upsample;
-    } else if (h_in_group * 2 == h_out_group &&
-	       v_in_group * 2 == v_out_group) {
-      /* Special case for 2h2v upsampling */
-      upsample->methods[ci] = h2v2_upsample;
-    } else if ((h_out_group % h_in_group) == 0 &&
-	       (v_out_group % v_in_group) == 0) {
-      /* Generic integral-factors upsampling method */
-      upsample->methods[ci] = int_upsample;
-      upsample->h_expand[ci] = (UINT8) (h_out_group / h_in_group);
-      upsample->v_expand[ci] = (UINT8) (v_out_group / v_in_group);
-    } else
-      ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
-    upsample->color_buf[ci] = (*cinfo->mem->alloc_sarray)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       (JDIMENSION) jround_up((long) cinfo->output_width,
-			      (long) cinfo->max_h_samp_factor),
-       (JDIMENSION) cinfo->max_v_samp_factor);
-  }
-}
diff --git a/dep/libjpeg/src/jdtrans.c b/dep/libjpeg/src/jdtrans.c
deleted file mode 100644
index 22dd47fb5..000000000
--- a/dep/libjpeg/src/jdtrans.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * jdtrans.c
- *
- * Copyright (C) 1995-1997, Thomas G. Lane.
- * Modified 2000-2009 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains library routines for transcoding decompression,
- * that is, reading raw DCT coefficient arrays from an input JPEG file.
- * The routines in jdapimin.c will also be needed by a transcoder.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Forward declarations */
-LOCAL(void) transdecode_master_selection JPP((j_decompress_ptr cinfo));
-
-
-/*
- * Read the coefficient arrays from a JPEG file.
- * jpeg_read_header must be completed before calling this.
- *
- * The entire image is read into a set of virtual coefficient-block arrays,
- * one per component.  The return value is a pointer to the array of
- * virtual-array descriptors.  These can be manipulated directly via the
- * JPEG memory manager, or handed off to jpeg_write_coefficients().
- * To release the memory occupied by the virtual arrays, call
- * jpeg_finish_decompress() when done with the data.
- *
- * An alternative usage is to simply obtain access to the coefficient arrays
- * during a buffered-image-mode decompression operation.  This is allowed
- * after any jpeg_finish_output() call.  The arrays can be accessed until
- * jpeg_finish_decompress() is called.  (Note that any call to the library
- * may reposition the arrays, so don't rely on access_virt_barray() results
- * to stay valid across library calls.)
- *
- * Returns NULL if suspended.  This case need be checked only if
- * a suspending data source is used.
- */
-
-GLOBAL(jvirt_barray_ptr *)
-jpeg_read_coefficients (j_decompress_ptr cinfo)
-{
-  if (cinfo->global_state == DSTATE_READY) {
-    /* First call: initialize active modules */
-    transdecode_master_selection(cinfo);
-    cinfo->global_state = DSTATE_RDCOEFS;
-  }
-  if (cinfo->global_state == DSTATE_RDCOEFS) {
-    /* Absorb whole file into the coef buffer */
-    for (;;) {
-      int retcode;
-      /* Call progress monitor hook if present */
-      if (cinfo->progress != NULL)
-	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-      /* Absorb some more input */
-      retcode = (*cinfo->inputctl->consume_input) (cinfo);
-      if (retcode == JPEG_SUSPENDED)
-	return NULL;
-      if (retcode == JPEG_REACHED_EOI)
-	break;
-      /* Advance progress counter if appropriate */
-      if (cinfo->progress != NULL &&
-	  (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
-	if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
-	  /* startup underestimated number of scans; ratchet up one scan */
-	  cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
-	}
-      }
-    }
-    /* Set state so that jpeg_finish_decompress does the right thing */
-    cinfo->global_state = DSTATE_STOPPING;
-  }
-  /* At this point we should be in state DSTATE_STOPPING if being used
-   * standalone, or in state DSTATE_BUFIMAGE if being invoked to get access
-   * to the coefficients during a full buffered-image-mode decompression.
-   */
-  if ((cinfo->global_state == DSTATE_STOPPING ||
-       cinfo->global_state == DSTATE_BUFIMAGE) && cinfo->buffered_image) {
-    return cinfo->coef->coef_arrays;
-  }
-  /* Oops, improper usage */
-  ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  return NULL;			/* keep compiler happy */
-}
-
-
-/*
- * Master selection of decompression modules for transcoding.
- * This substitutes for jdmaster.c's initialization of the full decompressor.
- */
-
-LOCAL(void)
-transdecode_master_selection (j_decompress_ptr cinfo)
-{
-  /* This is effectively a buffered-image operation. */
-  cinfo->buffered_image = TRUE;
-
-  /* Compute output image dimensions and related values. */
-  jpeg_core_output_dimensions(cinfo);
-
-  /* Entropy decoding: either Huffman or arithmetic coding. */
-  if (cinfo->arith_code)
-    jinit_arith_decoder(cinfo);
-  else {
-    jinit_huff_decoder(cinfo);
-  }
-
-  /* Always get a full-image coefficient buffer. */
-  jinit_d_coef_controller(cinfo, TRUE);
-
-  /* We can now tell the memory manager to allocate virtual arrays. */
-  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
-
-  /* Initialize input side of decompressor to consume first scan. */
-  (*cinfo->inputctl->start_input_pass) (cinfo);
-
-  /* Initialize progress monitoring. */
-  if (cinfo->progress != NULL) {
-    int nscans;
-    /* Estimate number of scans to set pass_limit. */
-    if (cinfo->progressive_mode) {
-      /* Arbitrarily estimate 2 interleaved DC scans + 3 AC scans/component. */
-      nscans = 2 + 3 * cinfo->num_components;
-    } else if (cinfo->inputctl->has_multiple_scans) {
-      /* For a nonprogressive multiscan file, estimate 1 scan per component. */
-      nscans = cinfo->num_components;
-    } else {
-      nscans = 1;
-    }
-    cinfo->progress->pass_counter = 0L;
-    cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows * nscans;
-    cinfo->progress->completed_passes = 0;
-    cinfo->progress->total_passes = 1;
-  }
-}
diff --git a/dep/libjpeg/src/jerror.c b/dep/libjpeg/src/jerror.c
deleted file mode 100644
index 7163af699..000000000
--- a/dep/libjpeg/src/jerror.c
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * jerror.c
- *
- * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2012-2015 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains simple error-reporting and trace-message routines.
- * These are suitable for Unix-like systems and others where writing to
- * stderr is the right thing to do.  Many applications will want to replace
- * some or all of these routines.
- *
- * If you define USE_WINDOWS_MESSAGEBOX in jconfig.h or in the makefile,
- * you get a Windows-specific hack to display error messages in a dialog box.
- * It ain't much, but it beats dropping error messages into the bit bucket,
- * which is what happens to output to stderr under most Windows C compilers.
- *
- * These routines are used by both the compression and decompression code.
- */
-
-#ifdef USE_WINDOWS_MESSAGEBOX
-#include <windows.h>
-#endif
-
-/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jversion.h"
-#include "jerror.h"
-
-#ifndef EXIT_FAILURE		/* define exit() codes if not provided */
-#define EXIT_FAILURE  1
-#endif
-
-
-/*
- * Create the message string table.
- * We do this from the master message list in jerror.h by re-reading
- * jerror.h with a suitable definition for macro JMESSAGE.
- * The message table is made an external symbol just in case any applications
- * want to refer to it directly.
- */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_std_message_table	jMsgTable
-#endif
-
-#define JMESSAGE(code,string)	string ,
-
-const char * const jpeg_std_message_table[] = {
-#include "jerror.h"
-  NULL
-};
-
-
-/*
- * Error exit handler: must not return to caller.
- *
- * Applications may override this if they want to get control back after
- * an error.  Typically one would longjmp somewhere instead of exiting.
- * The setjmp buffer can be made a private field within an expanded error
- * handler object.  Note that the info needed to generate an error message
- * is stored in the error object, so you can generate the message now or
- * later, at your convenience.
- * You should make sure that the JPEG object is cleaned up (with jpeg_abort
- * or jpeg_destroy) at some point.
- */
-
-METHODDEF(noreturn_t)
-error_exit (j_common_ptr cinfo)
-{
-  /* Always display the message */
-  (*cinfo->err->output_message) (cinfo);
-
-  /* Let the memory manager delete any temp files before we die */
-  jpeg_destroy(cinfo);
-
-  exit(EXIT_FAILURE);
-}
-
-
-/*
- * Actual output of an error or trace message.
- * Applications may override this method to send JPEG messages somewhere
- * other than stderr.
- *
- * On Windows, printing to stderr is generally completely useless,
- * so we provide optional code to produce an error-dialog popup.
- * Most Windows applications will still prefer to override this routine,
- * but if they don't, it'll do something at least marginally useful.
- *
- * NOTE: to use the library in an environment that doesn't support the
- * C stdio library, you may have to delete the call to fprintf() entirely,
- * not just not use this routine.
- */
-
-METHODDEF(void)
-output_message (j_common_ptr cinfo)
-{
-  char buffer[JMSG_LENGTH_MAX];
-
-  /* Create the message */
-  (*cinfo->err->format_message) (cinfo, buffer);
-
-#ifdef USE_WINDOWS_MESSAGEBOX
-  /* Display it in a message dialog box */
-  MessageBox(GetActiveWindow(), buffer, "JPEG Library Error",
-	     MB_OK | MB_ICONERROR);
-#else
-  /* Send it to stderr, adding a newline */
-  fprintf(stderr, "%s\n", buffer);
-#endif
-}
-
-
-/*
- * Decide whether to emit a trace or warning message.
- * msg_level is one of:
- *   -1: recoverable corrupt-data warning, may want to abort.
- *    0: important advisory messages (always display to user).
- *    1: first level of tracing detail.
- *    2,3,...: successively more detailed tracing messages.
- * An application might override this method if it wanted to abort on warnings
- * or change the policy about which messages to display.
- */
-
-METHODDEF(void)
-emit_message (j_common_ptr cinfo, int msg_level)
-{
-  struct jpeg_error_mgr * err = cinfo->err;
-
-  if (msg_level < 0) {
-    /* It's a warning message.  Since corrupt files may generate many warnings,
-     * the policy implemented here is to show only the first warning,
-     * unless trace_level >= 3.
-     */
-    if (err->num_warnings == 0 || err->trace_level >= 3)
-      (*err->output_message) (cinfo);
-    /* Always count warnings in num_warnings. */
-    err->num_warnings++;
-  } else {
-    /* It's a trace message.  Show it if trace_level >= msg_level. */
-    if (err->trace_level >= msg_level)
-      (*err->output_message) (cinfo);
-  }
-}
-
-
-/*
- * Format a message string for the most recent JPEG error or message.
- * The message is stored into buffer, which should be at least JMSG_LENGTH_MAX
- * characters.  Note that no '\n' character is added to the string.
- * Few applications should need to override this method.
- */
-
-METHODDEF(void)
-format_message (j_common_ptr cinfo, char * buffer)
-{
-  struct jpeg_error_mgr * err = cinfo->err;
-  int msg_code = err->msg_code;
-  const char * msgtext = NULL;
-  const char * msgptr;
-  char ch;
-  boolean isstring;
-
-  /* Look up message string in proper table */
-  if (msg_code > 0 && msg_code <= err->last_jpeg_message) {
-    msgtext = err->jpeg_message_table[msg_code];
-  } else if (err->addon_message_table != NULL &&
-	     msg_code >= err->first_addon_message &&
-	     msg_code <= err->last_addon_message) {
-    msgtext = err->addon_message_table[msg_code - err->first_addon_message];
-  }
-
-  /* Defend against bogus message number */
-  if (msgtext == NULL) {
-    err->msg_parm.i[0] = msg_code;
-    msgtext = err->jpeg_message_table[0];
-  }
-
-  /* Check for string parameter, as indicated by %s in the message text */
-  isstring = FALSE;
-  msgptr = msgtext;
-  while ((ch = *msgptr++) != '\0') {
-    if (ch == '%') {
-      if (*msgptr == 's') isstring = TRUE;
-      break;
-    }
-  }
-
-  /* Format the message into the passed buffer */
-  if (isstring)
-    sprintf(buffer, msgtext, err->msg_parm.s);
-  else
-    sprintf(buffer, msgtext,
-	    err->msg_parm.i[0], err->msg_parm.i[1],
-	    err->msg_parm.i[2], err->msg_parm.i[3],
-	    err->msg_parm.i[4], err->msg_parm.i[5],
-	    err->msg_parm.i[6], err->msg_parm.i[7]);
-}
-
-
-/*
- * Reset error state variables at start of a new image.
- * This is called during compression startup to reset trace/error
- * processing to default state, without losing any application-specific
- * method pointers.  An application might possibly want to override
- * this method if it has additional error processing state.
- */
-
-METHODDEF(void)
-reset_error_mgr (j_common_ptr cinfo)
-{
-  cinfo->err->num_warnings = 0;
-  /* trace_level is not reset since it is an application-supplied parameter */
-  cinfo->err->msg_code = 0;	/* may be useful as a flag for "no error" */
-}
-
-
-/*
- * Fill in the standard error-handling methods in a jpeg_error_mgr object.
- * Typical call is:
- *	struct jpeg_compress_struct cinfo;
- *	struct jpeg_error_mgr err;
- *
- *	cinfo.err = jpeg_std_error(&err);
- * after which the application may override some of the methods.
- */
-
-GLOBAL(struct jpeg_error_mgr *)
-jpeg_std_error (struct jpeg_error_mgr * err)
-{
-  err->error_exit = error_exit;
-  err->emit_message = emit_message;
-  err->output_message = output_message;
-  err->format_message = format_message;
-  err->reset_error_mgr = reset_error_mgr;
-
-  err->trace_level = 0;		/* default = no tracing */
-  err->num_warnings = 0;	/* no warnings emitted yet */
-  err->msg_code = 0;		/* may be useful as a flag for "no error" */
-
-  /* Initialize message table pointers */
-  err->jpeg_message_table = jpeg_std_message_table;
-  err->last_jpeg_message = (int) JMSG_LASTMSGCODE - 1;
-
-  err->addon_message_table = NULL;
-  err->first_addon_message = 0;	/* for safety */
-  err->last_addon_message = 0;
-
-  return err;
-}
diff --git a/dep/libjpeg/src/jfdctflt.c b/dep/libjpeg/src/jfdctflt.c
deleted file mode 100644
index 013f29e0c..000000000
--- a/dep/libjpeg/src/jfdctflt.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * jfdctflt.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2003-2017 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains a floating-point implementation of the
- * forward DCT (Discrete Cosine Transform).
- *
- * This implementation should be more accurate than either of the integer
- * DCT implementations.  However, it may not give the same results on all
- * machines because of differences in roundoff behavior.  Speed will depend
- * on the hardware's floating point capacity.
- *
- * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
- * on each column.  Direct algorithms are also available, but they are
- * much more complex and seem not to be any faster when reduced to code.
- *
- * This implementation is based on Arai, Agui, and Nakajima's algorithm for
- * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
- * Japanese, but the algorithm is described in the Pennebaker & Mitchell
- * JPEG textbook (see REFERENCES section in file README).  The following code
- * is based directly on figure 4-8 in P&M.
- * While an 8-point DCT cannot be done in less than 11 multiplies, it is
- * possible to arrange the computation so that many of the multiplies are
- * simple scalings of the final outputs.  These multiplies can then be
- * folded into the multiplications or divisions by the JPEG quantization
- * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
- * to be done in the DCT itself.
- * The primary disadvantage of this method is that with a fixed-point
- * implementation, accuracy is lost due to imprecise representation of the
- * scaled quantization values.  However, that problem does not arise if
- * we use floating point arithmetic.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-#ifdef DCT_FLOAT_SUPPORTED
-
-
-/*
- * This module is specialized to the case DCTSIZE = 8.
- */
-
-#if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
-#endif
-
-
-/*
- * Perform the forward DCT on one block of samples.
- *
- * cK represents cos(K*pi/16).
- */
-
-GLOBAL(void)
-jpeg_fdct_float (FAST_FLOAT * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
-  FAST_FLOAT z1, z2, z3, z4, z5, z11, z13;
-  FAST_FLOAT *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-
-  /* Pass 1: process rows. */
-
-  dataptr = data;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Load data into workspace */
-    tmp0 = (FAST_FLOAT) (GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]));
-    tmp7 = (FAST_FLOAT) (GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]));
-    tmp1 = (FAST_FLOAT) (GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]));
-    tmp6 = (FAST_FLOAT) (GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]));
-    tmp2 = (FAST_FLOAT) (GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]));
-    tmp5 = (FAST_FLOAT) (GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]));
-    tmp3 = (FAST_FLOAT) (GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]));
-    tmp4 = (FAST_FLOAT) (GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]));
-
-    /* Even part */
-
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
-    tmp13 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp12 = tmp1 - tmp2;
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = tmp10 + tmp11 - 8 * CENTERJSAMPLE; /* phase 3 */
-    dataptr[4] = tmp10 - tmp11;
-
-    z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
-    dataptr[2] = tmp13 + z1;	/* phase 5 */
-    dataptr[6] = tmp13 - z1;
-
-    /* Odd part */
-
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
-    tmp11 = tmp5 + tmp6;
-    tmp12 = tmp6 + tmp7;
-
-    /* The rotator is modified from fig 4-8 to avoid extra negations. */
-    z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
-    z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
-    z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
-    z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
-
-    z11 = tmp7 + z3;		/* phase 5 */
-    z13 = tmp7 - z3;
-
-    dataptr[5] = z13 + z2;	/* phase 6 */
-    dataptr[3] = z13 - z2;
-    dataptr[1] = z11 + z4;
-    dataptr[7] = z11 - z4;
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns. */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
-    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
-    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
-    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
-    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-
-    /* Even part */
-
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
-    tmp13 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp12 = tmp1 - tmp2;
-
-    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
-    dataptr[DCTSIZE*4] = tmp10 - tmp11;
-
-    z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
-    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
-    dataptr[DCTSIZE*6] = tmp13 - z1;
-
-    /* Odd part */
-
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
-    tmp11 = tmp5 + tmp6;
-    tmp12 = tmp6 + tmp7;
-
-    /* The rotator is modified from fig 4-8 to avoid extra negations. */
-    z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
-    z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
-    z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
-    z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
-
-    z11 = tmp7 + z3;		/* phase 5 */
-    z13 = tmp7 - z3;
-
-    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
-    dataptr[DCTSIZE*3] = z13 - z2;
-    dataptr[DCTSIZE*1] = z11 + z4;
-    dataptr[DCTSIZE*7] = z11 - z4;
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-#endif /* DCT_FLOAT_SUPPORTED */
diff --git a/dep/libjpeg/src/jfdctfst.c b/dep/libjpeg/src/jfdctfst.c
deleted file mode 100644
index 5e4e01722..000000000
--- a/dep/libjpeg/src/jfdctfst.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * jfdctfst.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2003-2017 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains a fast, not so accurate integer implementation of the
- * forward DCT (Discrete Cosine Transform).
- *
- * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
- * on each column.  Direct algorithms are also available, but they are
- * much more complex and seem not to be any faster when reduced to code.
- *
- * This implementation is based on Arai, Agui, and Nakajima's algorithm for
- * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
- * Japanese, but the algorithm is described in the Pennebaker & Mitchell
- * JPEG textbook (see REFERENCES section in file README).  The following code
- * is based directly on figure 4-8 in P&M.
- * While an 8-point DCT cannot be done in less than 11 multiplies, it is
- * possible to arrange the computation so that many of the multiplies are
- * simple scalings of the final outputs.  These multiplies can then be
- * folded into the multiplications or divisions by the JPEG quantization
- * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
- * to be done in the DCT itself.
- * The primary disadvantage of this method is that with fixed-point math,
- * accuracy is lost due to imprecise representation of the scaled
- * quantization values.  The smaller the quantization table entry, the less
- * precise the scaled value, so this implementation does worse with high-
- * quality-setting files than with low-quality ones.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-#ifdef DCT_IFAST_SUPPORTED
-
-
-/*
- * This module is specialized to the case DCTSIZE = 8.
- */
-
-#if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
-#endif
-
-
-/* Scaling decisions are generally the same as in the LL&M algorithm;
- * see jfdctint.c for more details.  However, we choose to descale
- * (right shift) multiplication products as soon as they are formed,
- * rather than carrying additional fractional bits into subsequent additions.
- * This compromises accuracy slightly, but it lets us save a few shifts.
- * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
- * everywhere except in the multiplications proper; this saves a good deal
- * of work on 16-bit-int machines.
- *
- * Again to save a few shifts, the intermediate results between pass 1 and
- * pass 2 are not upscaled, but are represented only to integral precision.
- *
- * A final compromise is to represent the multiplicative constants to only
- * 8 fractional bits, rather than 13.  This saves some shifting work on some
- * machines, and may also reduce the cost of multiplication (since there
- * are fewer one-bits in the constants).
- */
-
-#define CONST_BITS  8
-
-
-/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
- * causing a lot of useless floating-point operations at run time.
- * To get around this we use the following pre-calculated constants.
- * If you change CONST_BITS you may want to add appropriate values.
- * (With a reasonable C compiler, you can just rely on the FIX() macro...)
- */
-
-#if CONST_BITS == 8
-#define FIX_0_382683433  ((INT32)   98)		/* FIX(0.382683433) */
-#define FIX_0_541196100  ((INT32)  139)		/* FIX(0.541196100) */
-#define FIX_0_707106781  ((INT32)  181)		/* FIX(0.707106781) */
-#define FIX_1_306562965  ((INT32)  334)		/* FIX(1.306562965) */
-#else
-#define FIX_0_382683433  FIX(0.382683433)
-#define FIX_0_541196100  FIX(0.541196100)
-#define FIX_0_707106781  FIX(0.707106781)
-#define FIX_1_306562965  FIX(1.306562965)
-#endif
-
-
-/* We can gain a little more speed, with a further compromise in accuracy,
- * by omitting the addition in a descaling shift.  This yields an incorrectly
- * rounded result half the time...
- */
-
-#ifndef USE_ACCURATE_ROUNDING
-#undef DESCALE
-#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
-#endif
-
-
-/* Multiply a DCTELEM variable by an INT32 constant, and immediately
- * descale to yield a DCTELEM result.
- */
-
-#define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
-
-
-/*
- * Perform the forward DCT on one block of samples.
- *
- * cK represents cos(K*pi/16).
- */
-
-GLOBAL(void)
-jpeg_fdct_ifast (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  DCTELEM tmp10, tmp11, tmp12, tmp13;
-  DCTELEM z1, z2, z3, z4, z5, z11, z13;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows. */
-
-  dataptr = data;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Load data into workspace */
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
-    tmp7 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
-    tmp6 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
-    tmp5 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
-    tmp4 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
-
-    /* Even part */
-
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
-    tmp13 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp12 = tmp1 - tmp2;
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = tmp10 + tmp11 - 8 * CENTERJSAMPLE; /* phase 3 */
-    dataptr[4] = tmp10 - tmp11;
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
-    dataptr[2] = tmp13 + z1;	/* phase 5 */
-    dataptr[6] = tmp13 - z1;
-
-    /* Odd part */
-
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
-    tmp11 = tmp5 + tmp6;
-    tmp12 = tmp6 + tmp7;
-
-    /* The rotator is modified from fig 4-8 to avoid extra negations. */
-    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
-    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
-    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
-    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
-
-    z11 = tmp7 + z3;		/* phase 5 */
-    z13 = tmp7 - z3;
-
-    dataptr[5] = z13 + z2;	/* phase 6 */
-    dataptr[3] = z13 - z2;
-    dataptr[1] = z11 + z4;
-    dataptr[7] = z11 - z4;
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns. */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
-    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
-    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
-    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
-    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-
-    /* Even part */
-
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
-    tmp13 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp12 = tmp1 - tmp2;
-
-    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
-    dataptr[DCTSIZE*4] = tmp10 - tmp11;
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
-    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
-    dataptr[DCTSIZE*6] = tmp13 - z1;
-
-    /* Odd part */
-
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
-    tmp11 = tmp5 + tmp6;
-    tmp12 = tmp6 + tmp7;
-
-    /* The rotator is modified from fig 4-8 to avoid extra negations. */
-    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
-    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
-    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
-    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
-
-    z11 = tmp7 + z3;		/* phase 5 */
-    z13 = tmp7 - z3;
-
-    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
-    dataptr[DCTSIZE*3] = z13 - z2;
-    dataptr[DCTSIZE*1] = z11 + z4;
-    dataptr[DCTSIZE*7] = z11 - z4;
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-#endif /* DCT_IFAST_SUPPORTED */
diff --git a/dep/libjpeg/src/jfdctint.c b/dep/libjpeg/src/jfdctint.c
deleted file mode 100644
index 05df4750b..000000000
--- a/dep/libjpeg/src/jfdctint.c
+++ /dev/null
@@ -1,4415 +0,0 @@
-/*
- * jfdctint.c
- *
- * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modification developed 2003-2018 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains a slow-but-accurate integer implementation of the
- * forward DCT (Discrete Cosine Transform).
- *
- * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
- * on each column.  Direct algorithms are also available, but they are
- * much more complex and seem not to be any faster when reduced to code.
- *
- * This implementation is based on an algorithm described in
- *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
- *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
- *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
- * The primary algorithm described there uses 11 multiplies and 29 adds.
- * We use their alternate method with 12 multiplies and 32 adds.
- * The advantage of this method is that no data path contains more than one
- * multiplication; this allows a very simple and accurate implementation in
- * scaled fixed-point arithmetic, with a minimal number of shifts.
- *
- * We also provide FDCT routines with various input sample block sizes for
- * direct resolution reduction or enlargement and for direct resolving the
- * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
- * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 output DCT block.
- *
- * For N<8 we fill the remaining block coefficients with zero.
- * For N>8 we apply a partial N-point FDCT on the input samples, computing
- * just the lower 8 frequency coefficients and discarding the rest.
- *
- * We must scale the output coefficients of the N-point FDCT appropriately
- * to the standard 8-point FDCT level by 8/N per 1-D pass.  This scaling
- * is folded into the constant multipliers (pass 2) and/or final/initial
- * shifting.
- *
- * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
- * since there would be too many additional constants to pre-calculate.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-#ifdef DCT_ISLOW_SUPPORTED
-
-
-/*
- * This module is specialized to the case DCTSIZE = 8.
- */
-
-#if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
-#endif
-
-
-/*
- * The poop on this scaling stuff is as follows:
- *
- * Each 1-D DCT step produces outputs which are a factor of sqrt(N)
- * larger than the true DCT outputs.  The final outputs are therefore
- * a factor of N larger than desired; since N=8 this can be cured by
- * a simple right shift at the end of the algorithm.  The advantage of
- * this arrangement is that we save two multiplications per 1-D DCT,
- * because the y0 and y4 outputs need not be divided by sqrt(N).
- * In the IJG code, this factor of 8 is removed by the quantization step
- * (in jcdctmgr.c), NOT in this module.
- *
- * We have to do addition and subtraction of the integer inputs, which
- * is no problem, and multiplication by fractional constants, which is
- * a problem to do in integer arithmetic.  We multiply all the constants
- * by CONST_SCALE and convert them to integer constants (thus retaining
- * CONST_BITS bits of precision in the constants).  After doing a
- * multiplication we have to divide the product by CONST_SCALE, with proper
- * rounding, to produce the correct output.  This division can be done
- * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
- * as long as possible so that partial sums can be added together with
- * full fractional precision.
- *
- * The outputs of the first pass are scaled up by PASS1_BITS bits so that
- * they are represented to better-than-integral precision.  These outputs
- * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
- * with the recommended scaling.  (For 12-bit sample data, the intermediate
- * array is INT32 anyway.)
- *
- * To avoid overflow of the 32-bit intermediate results in pass 2, we must
- * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
- * shows that the values given below are the most effective.
- */
-
-#if BITS_IN_JSAMPLE == 8
-#define CONST_BITS  13
-#define PASS1_BITS  2
-#else
-#define CONST_BITS  13
-#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
-#endif
-
-/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
- * causing a lot of useless floating-point operations at run time.
- * To get around this we use the following pre-calculated constants.
- * If you change CONST_BITS you may want to add appropriate values.
- * (With a reasonable C compiler, you can just rely on the FIX() macro...)
- */
-
-#if CONST_BITS == 13
-#define FIX_0_298631336  ((INT32)  2446)	/* FIX(0.298631336) */
-#define FIX_0_390180644  ((INT32)  3196)	/* FIX(0.390180644) */
-#define FIX_0_541196100  ((INT32)  4433)	/* FIX(0.541196100) */
-#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
-#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
-#define FIX_1_175875602  ((INT32)  9633)	/* FIX(1.175875602) */
-#define FIX_1_501321110  ((INT32)  12299)	/* FIX(1.501321110) */
-#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
-#define FIX_1_961570560  ((INT32)  16069)	/* FIX(1.961570560) */
-#define FIX_2_053119869  ((INT32)  16819)	/* FIX(2.053119869) */
-#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
-#define FIX_3_072711026  ((INT32)  25172)	/* FIX(3.072711026) */
-#else
-#define FIX_0_298631336  FIX(0.298631336)
-#define FIX_0_390180644  FIX(0.390180644)
-#define FIX_0_541196100  FIX(0.541196100)
-#define FIX_0_765366865  FIX(0.765366865)
-#define FIX_0_899976223  FIX(0.899976223)
-#define FIX_1_175875602  FIX(1.175875602)
-#define FIX_1_501321110  FIX(1.501321110)
-#define FIX_1_847759065  FIX(1.847759065)
-#define FIX_1_961570560  FIX(1.961570560)
-#define FIX_2_053119869  FIX(2.053119869)
-#define FIX_2_562915447  FIX(2.562915447)
-#define FIX_3_072711026  FIX(3.072711026)
-#endif
-
-
-/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
- * For 8-bit samples with the recommended scaling, all the variable
- * and constant values involved are no more than 16 bits wide, so a
- * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
- * For 12-bit samples, a full 32-bit multiplication will be needed.
- */
-
-#if BITS_IN_JSAMPLE == 8
-#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
-#else
-#define MULTIPLY(var,const)  ((var) * (const))
-#endif
-
-
-/*
- * Perform the forward DCT on one block of samples.
- */
-
-GLOBAL(void)
-jpeg_fdct_islow (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * cK represents sqrt(2) * cos(K*pi/16).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "c1" should be "c6".
-     */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
-
-    tmp10 = tmp0 + tmp3;
-    tmp12 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp13 = tmp1 - tmp2;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
-    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    dataptr[2] = (DCTELEM)
-      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM)
-      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS-PASS1_BITS);
-
-    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * i0..i3 in the paper are tmp0..tmp3 here.
-     */
-
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
-    tmp12 += z1;
-    tmp13 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
-    tmp0 += z1 + tmp12;
-    tmp3 += z1 + tmp13;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
-    tmp1 += z1 + tmp13;
-    tmp2 += z1 + tmp12;
-
-    dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
-    dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * cK represents sqrt(2) * cos(K*pi/16).
-   */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "c1" should be "c6".
-     */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
-
-    /* Add fudge factor here for final descale. */
-    tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1));
-    tmp12 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp13 = tmp1 - tmp2;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-
-    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
-    dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS+PASS1_BITS-1);
-
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS+PASS1_BITS);
-
-    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * i0..i3 in the paper are tmp0..tmp3 here.
-     */
-
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS+PASS1_BITS-1);
-
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
-    tmp12 += z1;
-    tmp13 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
-    tmp0 += z1 + tmp12;
-    tmp3 += z1 + tmp13;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
-    tmp1 += z1 + tmp13;
-    tmp2 += z1 + tmp12;
-
-    dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-#ifdef DCT_SCALING_SUPPORTED
-
-
-/*
- * Perform the forward DCT on a 7x7 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_7x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  INT32 tmp10, tmp11, tmp12;
-  INT32 z1, z2, z3;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * cK represents sqrt(2) * cos(K*pi/14).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 7; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
-    tmp3 = GETJSAMPLE(elemptr[3]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
-    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
-
-    z1 = tmp0 + tmp2;
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS);
-    tmp3 += tmp3;
-    z1 -= tmp3;
-    z1 -= tmp3;
-    z1 = MULTIPLY(z1, FIX(0.353553391));                /* (c2+c6-c4)/2 */
-    z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002));       /* (c2+c4-c6)/2 */
-    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123));       /* c6 */
-    dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
-    z1 -= z2;
-    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734));       /* c4 */
-    dataptr[4] = (DCTELEM)
-      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347));   /* (c3+c1-c5)/2 */
-    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339));   /* (c3+c5-c1)/2 */
-    tmp0 = tmp1 - tmp2;
-    tmp1 += tmp2;
-    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */
-    tmp1 += tmp2;
-    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268));   /* c5 */
-    tmp0 += tmp3;
-    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693));   /* c3+c1-c5 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/7)**2 = 64/49, which we fold
-   * into the constant multipliers:
-   * cK now represents sqrt(2) * cos(K*pi/14) * 64/49.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 7; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4];
-    tmp3 = dataptr[DCTSIZE*3];
-
-    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6];
-    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5];
-    tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4];
-
-    z1 = tmp0 + tmp2;
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */
-	      CONST_BITS+PASS1_BITS);
-    tmp3 += tmp3;
-    z1 -= tmp3;
-    z1 -= tmp3;
-    z1 = MULTIPLY(z1, FIX(0.461784020));                /* (c2+c6-c4)/2 */
-    z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084));       /* (c2+c4-c6)/2 */
-    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446));       /* c6 */
-    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS);
-    z1 -= z2;
-    z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509));       /* c4 */
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677));   /* (c3+c1-c5)/2 */
-    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464));   /* (c3+c5-c1)/2 */
-    tmp0 = tmp1 - tmp2;
-    tmp1 += tmp2;
-    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */
-    tmp1 += tmp2;
-    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310));   /* c5 */
-    tmp0 += tmp3;
-    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355));   /* c3+c1-c5 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 6x6 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_6x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2;
-  INT32 tmp10, tmp11, tmp12;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * cK represents sqrt(2) * cos(K*pi/12).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 6; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
-    tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
-
-    tmp10 = tmp0 + tmp2;
-    tmp12 = tmp0 - tmp2;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS);
-    dataptr[2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp12, FIX(1.224744871)),                 /* c2 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)),     /* c5 */
-		    CONST_BITS-PASS1_BITS);
-
-    dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
-    dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
-    dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/6)**2 = 16/9, which we fold
-   * into the constant multipliers:
-   * cK now represents sqrt(2) * cos(K*pi/12) * 16/9.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 6; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
-    tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
-
-    tmp10 = tmp0 + tmp2;
-    tmp12 = tmp0 - tmp2;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)),         /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp12, FIX(2.177324216)),                 /* c2 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
-	      CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829));             /* c5 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),   /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)),    /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)),   /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 5x5 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_5x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2;
-  INT32 tmp10, tmp11;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * We scale the results further by 2 as part of output adaption
-   * scaling for different DCT size.
-   * cK represents sqrt(2) * cos(K*pi/10).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 5; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
-    tmp2 = GETJSAMPLE(elemptr[2]);
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << (PASS1_BITS+1));
-    tmp11 = MULTIPLY(tmp11, FIX(0.790569415));          /* (c2+c4)/2 */
-    tmp10 -= tmp2 << 2;
-    tmp10 = MULTIPLY(tmp10, FIX(0.353553391));          /* (c2-c4)/2 */
-    dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS-1);
-    dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS-1);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876));    /* c3 */
-
-    dataptr[1] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
-	      CONST_BITS-PASS1_BITS-1);
-    dataptr[3] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
-	      CONST_BITS-PASS1_BITS-1);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/5)**2 = 64/25, which we partially
-   * fold into the constant multipliers (other part was done in pass 1):
-   * cK now represents sqrt(2) * cos(K*pi/10) * 32/25.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 5; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3];
-    tmp2 = dataptr[DCTSIZE*2];
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)),        /* 32/25 */
-	      CONST_BITS+PASS1_BITS);
-    tmp11 = MULTIPLY(tmp11, FIX(1.011928851));          /* (c2+c4)/2 */
-    tmp10 -= tmp2 << 2;
-    tmp10 = MULTIPLY(tmp10, FIX(0.452548340));          /* (c2-c4)/2 */
-    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961));    /* c3 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */
-	      CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 4x4 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_4x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1;
-  INT32 tmp10, tmp11;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * We must also scale the output by (8/4)**2 = 2**2, which we add here.
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 4; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+2));
-    dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+2));
-
-    /* Odd part */
-
-    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-3);
-
-    dataptr[1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS-PASS1_BITS-2);
-    dataptr[3] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS-PASS1_BITS-2);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 4; ctr++) {
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1));
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
-
-    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
-    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
-
-    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
-    dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
-
-    /* Odd part */
-
-    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS+PASS1_BITS-1);
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 3x3 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_3x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * We scale the results further by 2**2 as part of output adaption
-   * scaling for different DCT size.
-   * cK represents sqrt(2) * cos(K*pi/6).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 3; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
-    tmp1 = GETJSAMPLE(elemptr[1]);
-
-    tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+2));
-    dataptr[2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
-	      CONST_BITS-PASS1_BITS-2);
-
-    /* Odd part */
-
-    dataptr[1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp2, FIX(1.224744871)),               /* c1 */
-	      CONST_BITS-PASS1_BITS-2);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/3)**2 = 64/9, which we partially
-   * fold into the constant multipliers (other part was done in pass 1):
-   * cK now represents sqrt(2) * cos(K*pi/6) * 16/9.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 3; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2];
-    tmp1 = dataptr[DCTSIZE*1];
-
-    tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),        /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */
-	      CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp2, FIX(2.177324216)),               /* c1 */
-	      CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 2x2 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_2x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  DCTELEM tmp0, tmp1, tmp2, tmp3;
-  JSAMPROW elemptr;
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT.
-   */
-
-  /* Row 0 */
-  elemptr = sample_data[0] + start_col;
-
-  tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
-  tmp1 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
-
-  /* Row 1 */
-  elemptr = sample_data[1] + start_col;
-
-  tmp2 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
-  tmp3 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/2)**2 = 2**4.
-   */
-
-  /* Column 0 */
-  /* Apply unsigned->signed conversion. */
-  data[DCTSIZE*0] = (tmp0 + tmp2 - 4 * CENTERJSAMPLE) << 4;
-  data[DCTSIZE*1] = (tmp0 - tmp2) << 4;
-
-  /* Column 1 */
-  data[DCTSIZE*0+1] = (tmp1 + tmp3) << 4;
-  data[DCTSIZE*1+1] = (tmp1 - tmp3) << 4;
-}
-
-
-/*
- * Perform the forward DCT on a 1x1 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_1x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  DCTELEM dcval;
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  dcval = GETJSAMPLE(sample_data[0][start_col]);
-
-  /* We leave the result scaled up by an overall factor of 8. */
-  /* We must also scale the output by (8/1)**2 = 2**6. */
-  /* Apply unsigned->signed conversion. */
-  data[0] = (dcval - CENTERJSAMPLE) << 6;
-}
-
-
-/*
- * Perform the forward DCT on a 9x9 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_9x9 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1, z2;
-  DCTELEM workspace[8];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * we scale the results further by 2 as part of output adaption
-   * scaling for different DCT size.
-   * cK represents sqrt(2) * cos(K*pi/18).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[8]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[7]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[6]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[5]);
-    tmp4 = GETJSAMPLE(elemptr[4]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[8]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[7]);
-    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[6]);
-    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[5]);
-
-    z1 = tmp0 + tmp2 + tmp3;
-    z2 = tmp1 + tmp4;
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM) ((z1 + z2 - 9 * CENTERJSAMPLE) << 1);
-    dataptr[6] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 - z2 - z2, FIX(0.707106781)),  /* c6 */
-	      CONST_BITS-1);
-    z1 = MULTIPLY(tmp0 - tmp2, FIX(1.328926049));        /* c2 */
-    z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(0.707106781)); /* c6 */
-    dataptr[2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.083350441))    /* c4 */
-	      + z1 + z2, CONST_BITS-1);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.245575608))    /* c8 */
-	      + z1 - z2, CONST_BITS-1);
-
-    /* Odd part */
-
-    dataptr[3] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.224744871)), /* c3 */
-	      CONST_BITS-1);
-
-    tmp11 = MULTIPLY(tmp11, FIX(1.224744871));        /* c3 */
-    tmp0 = MULTIPLY(tmp10 + tmp12, FIX(0.909038955)); /* c5 */
-    tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.483689525)); /* c7 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS-1);
-
-    tmp2 = MULTIPLY(tmp12 - tmp13, FIX(1.392728481)); /* c1 */
-
-    dataptr[5] = (DCTELEM) DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS-1);
-    dataptr[7] = (DCTELEM) DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS-1);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 9)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/9)**2 = 64/81, which we partially
-   * fold into the constant multipliers and final/initial shifting:
-   * cK now represents sqrt(2) * cos(K*pi/18) * 128/81.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*0];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*7];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*6];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*5];
-    tmp4 = dataptr[DCTSIZE*4];
-
-    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*0];
-    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*7];
-    tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*6];
-    tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*5];
-
-    z1 = tmp0 + tmp2 + tmp3;
-    z2 = tmp1 + tmp4;
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 + z2, FIX(1.580246914)),       /* 128/81 */
-	      CONST_BITS+2);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 - z2 - z2, FIX(1.117403309)),  /* c6 */
-	      CONST_BITS+2);
-    z1 = MULTIPLY(tmp0 - tmp2, FIX(2.100031287));        /* c2 */
-    z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(1.117403309)); /* c6 */
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.711961190))    /* c4 */
-	      + z1 + z2, CONST_BITS+2);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.388070096))    /* c8 */
-	      + z1 - z2, CONST_BITS+2);
-
-    /* Odd part */
-
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.935399303)), /* c3 */
-	      CONST_BITS+2);
-
-    tmp11 = MULTIPLY(tmp11, FIX(1.935399303));        /* c3 */
-    tmp0 = MULTIPLY(tmp10 + tmp12, FIX(1.436506004)); /* c5 */
-    tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.764348879)); /* c7 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS+2);
-
-    tmp2 = MULTIPLY(tmp12 - tmp13, FIX(2.200854883)); /* c1 */
-
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS+2);
-    dataptr[DCTSIZE*7] = (DCTELEM)
-      DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS+2);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 10x10 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_10x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  DCTELEM workspace[8*2];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * we scale the results further by 2 as part of output adaption
-   * scaling for different DCT size.
-   * cK represents sqrt(2) * cos(K*pi/20).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
-    tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
-
-    tmp10 = tmp0 + tmp4;
-    tmp13 = tmp0 - tmp4;
-    tmp11 = tmp1 + tmp3;
-    tmp14 = tmp1 - tmp3;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << 1);
-    tmp12 += tmp12;
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
-	      MULTIPLY(tmp11 - tmp12, FIX(0.437016024)),  /* c8 */
-	      CONST_BITS-1);
-    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876));    /* c6 */
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)),  /* c2-c6 */
-	      CONST_BITS-1);
-    dataptr[6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)),  /* c2+c6 */
-	      CONST_BITS-1);
-
-    /* Odd part */
-
-    tmp10 = tmp0 + tmp4;
-    tmp11 = tmp1 - tmp3;
-    dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << 1);
-    tmp2 <<= CONST_BITS;
-    dataptr[1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) +          /* c1 */
-	      MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 +   /* c3 */
-	      MULTIPLY(tmp3, FIX(0.642039522)) +          /* c7 */
-	      MULTIPLY(tmp4, FIX(0.221231742)),           /* c9 */
-	      CONST_BITS-1);
-    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) -     /* (c3+c7)/2 */
-	    MULTIPLY(tmp1 + tmp3, FIX(0.587785252));      /* (c1-c9)/2 */
-    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) +   /* (c3-c7)/2 */
-	    (tmp11 << (CONST_BITS - 1)) - tmp2;
-    dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-1);
-    dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-1);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 10)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/10)**2 = 16/25, which we partially
-   * fold into the constant multipliers and final/initial shifting:
-   * cK now represents sqrt(2) * cos(K*pi/20) * 32/25.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0];
-    tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6];
-    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
-
-    tmp10 = tmp0 + tmp4;
-    tmp13 = tmp0 - tmp4;
-    tmp11 = tmp1 + tmp3;
-    tmp14 = tmp1 - tmp3;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7];
-    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6];
-    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */
-	      CONST_BITS+2);
-    tmp12 += tmp12;
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */
-	      MULTIPLY(tmp11 - tmp12, FIX(0.559380511)),  /* c8 */
-	      CONST_BITS+2);
-    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961));    /* c6 */
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)),  /* c2-c6 */
-	      CONST_BITS+2);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)),  /* c2+c6 */
-	      CONST_BITS+2);
-
-    /* Odd part */
-
-    tmp10 = tmp0 + tmp4;
-    tmp11 = tmp1 - tmp3;
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)),  /* 32/25 */
-	      CONST_BITS+2);
-    tmp2 = MULTIPLY(tmp2, FIX(1.28));                     /* 32/25 */
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) +          /* c1 */
-	      MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 +   /* c3 */
-	      MULTIPLY(tmp3, FIX(0.821810588)) +          /* c7 */
-	      MULTIPLY(tmp4, FIX(0.283176630)),           /* c9 */
-	      CONST_BITS+2);
-    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) -     /* (c3+c7)/2 */
-	    MULTIPLY(tmp1 + tmp3, FIX(0.752365123));      /* (c1-c9)/2 */
-    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) +   /* (c3-c7)/2 */
-	    MULTIPLY(tmp11, FIX(0.64)) - tmp2;            /* 16/25 */
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+2);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+2);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on an 11x11 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_11x11 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  INT32 z1, z2, z3;
-  DCTELEM workspace[8*3];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * we scale the results further by 2 as part of output adaption
-   * scaling for different DCT size.
-   * cK represents sqrt(2) * cos(K*pi/22).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[10]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[9]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[8]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[7]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[6]);
-    tmp5 = GETJSAMPLE(elemptr[5]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[10]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[9]);
-    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[8]);
-    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[7]);
-    tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[6]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 - 11 * CENTERJSAMPLE) << 1);
-    tmp5 += tmp5;
-    tmp0 -= tmp5;
-    tmp1 -= tmp5;
-    tmp2 -= tmp5;
-    tmp3 -= tmp5;
-    tmp4 -= tmp5;
-    z1 = MULTIPLY(tmp0 + tmp3, FIX(1.356927976)) +       /* c2 */
-	 MULTIPLY(tmp2 + tmp4, FIX(0.201263574));        /* c10 */
-    z2 = MULTIPLY(tmp1 - tmp3, FIX(0.926112931));        /* c6 */
-    z3 = MULTIPLY(tmp0 - tmp1, FIX(1.189712156));        /* c4 */
-    dataptr[2] = (DCTELEM)
-      DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.018300590)) /* c2+c8-c6 */
-	      - MULTIPLY(tmp4, FIX(1.390975730)),        /* c4+c10 */
-	      CONST_BITS-1);
-    dataptr[4] = (DCTELEM)
-      DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.062335650)) /* c4-c6-c10 */
-	      - MULTIPLY(tmp2, FIX(1.356927976))         /* c2 */
-	      + MULTIPLY(tmp4, FIX(0.587485545)),        /* c8 */
-	      CONST_BITS-1);
-    dataptr[6] = (DCTELEM)
-      DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.620527200)) /* c2+c4-c6 */
-	      - MULTIPLY(tmp2, FIX(0.788749120)),        /* c8+c10 */
-	      CONST_BITS-1);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.286413905));    /* c3 */
-    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.068791298));    /* c5 */
-    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.764581576));    /* c7 */
-    tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.719967871)) /* c7+c5+c3-c1 */
-	   + MULTIPLY(tmp14, FIX(0.398430003));          /* c9 */
-    tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.764581576));  /* -c7 */
-    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.399818907));  /* -c1 */
-    tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.276416582)) /* c9+c7+c1-c3 */
-	    - MULTIPLY(tmp14, FIX(1.068791298));         /* c5 */
-    tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.398430003));   /* c9 */
-    tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(1.989053629)) /* c9+c5+c3-c7 */
-	    + MULTIPLY(tmp14, FIX(1.399818907));         /* c1 */
-    tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.305598626)) /* c1+c5-c9-c7 */
-	    - MULTIPLY(tmp14, FIX(1.286413905));         /* c3 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-1);
-    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-1);
-    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-1);
-    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-1);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 11)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/11)**2 = 64/121, which we partially
-   * fold into the constant multipliers and final/initial shifting:
-   * cK now represents sqrt(2) * cos(K*pi/22) * 128/121.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*2];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*1];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*0];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*7];
-    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*6];
-    tmp5 = dataptr[DCTSIZE*5];
-
-    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*2];
-    tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*1];
-    tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*0];
-    tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*7];
-    tmp14 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*6];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5,
-		       FIX(1.057851240)),                /* 128/121 */
-	      CONST_BITS+2);
-    tmp5 += tmp5;
-    tmp0 -= tmp5;
-    tmp1 -= tmp5;
-    tmp2 -= tmp5;
-    tmp3 -= tmp5;
-    tmp4 -= tmp5;
-    z1 = MULTIPLY(tmp0 + tmp3, FIX(1.435427942)) +       /* c2 */
-	 MULTIPLY(tmp2 + tmp4, FIX(0.212906922));        /* c10 */
-    z2 = MULTIPLY(tmp1 - tmp3, FIX(0.979689713));        /* c6 */
-    z3 = MULTIPLY(tmp0 - tmp1, FIX(1.258538479));        /* c4 */
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.077210542)) /* c2+c8-c6 */
-	      - MULTIPLY(tmp4, FIX(1.471445400)),        /* c4+c10 */
-	      CONST_BITS+2);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.065941844)) /* c4-c6-c10 */
-	      - MULTIPLY(tmp2, FIX(1.435427942))         /* c2 */
-	      + MULTIPLY(tmp4, FIX(0.621472312)),        /* c8 */
-	      CONST_BITS+2);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.714276708)) /* c2+c4-c6 */
-	      - MULTIPLY(tmp2, FIX(0.834379234)),        /* c8+c10 */
-	      CONST_BITS+2);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.360834544));    /* c3 */
-    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.130622199));    /* c5 */
-    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.808813568));    /* c7 */
-    tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.819470145)) /* c7+c5+c3-c1 */
-	   + MULTIPLY(tmp14, FIX(0.421479672));          /* c9 */
-    tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.808813568));  /* -c7 */
-    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.480800167));  /* -c1 */
-    tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.350258864)) /* c9+c7+c1-c3 */
-	    - MULTIPLY(tmp14, FIX(1.130622199));         /* c5 */
-    tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.421479672));   /* c9 */
-    tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(2.104122847)) /* c9+c5+c3-c7 */
-	    + MULTIPLY(tmp14, FIX(1.480800167));         /* c1 */
-    tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.381129125)) /* c1+c5-c9-c7 */
-	    - MULTIPLY(tmp14, FIX(1.360834544));         /* c3 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 12x12 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_12x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  DCTELEM workspace[8*4];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT.
-   * cK represents sqrt(2) * cos(K*pi/24).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
-
-    tmp10 = tmp0 + tmp5;
-    tmp13 = tmp0 - tmp5;
-    tmp11 = tmp1 + tmp4;
-    tmp14 = tmp1 - tmp4;
-    tmp12 = tmp2 + tmp3;
-    tmp15 = tmp2 - tmp3;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
-    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM) (tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE);
-    dataptr[6] = (DCTELEM) (tmp13 - tmp14 - tmp15);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
-	      CONST_BITS);
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
-	      CONST_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100);    /* c9 */
-    tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865);   /* c3-c9 */
-    tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065);   /* c3+c9 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054));   /* c5 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669));   /* c7 */
-    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
-	    + MULTIPLY(tmp5, FIX(0.184591911));        /* c11 */
-    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
-    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
-	    + MULTIPLY(tmp5, FIX(0.860918669));        /* c7 */
-    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */
-	    - MULTIPLY(tmp5, FIX(1.121971054));        /* c5 */
-    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */
-	    - MULTIPLY(tmp2 + tmp5, FIX_0_541196100);  /* c9 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 12)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/12)**2 = 4/9, which we partially
-   * fold into the constant multipliers and final shifting:
-   * cK now represents sqrt(2) * cos(K*pi/24) * 8/9.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1];
-    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0];
-    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7];
-    tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6];
-
-    tmp10 = tmp0 + tmp5;
-    tmp13 = tmp0 - tmp5;
-    tmp11 = tmp1 + tmp4;
-    tmp14 = tmp1 - tmp4;
-    tmp12 = tmp2 + tmp3;
-    tmp15 = tmp2 - tmp3;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2];
-    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1];
-    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0];
-    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7];
-    tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */
-	      CONST_BITS+1);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */
-	      CONST_BITS+1);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)),         /* c4 */
-	      CONST_BITS+1);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) +        /* 8/9 */
-	      MULTIPLY(tmp13 + tmp15, FIX(1.214244803)),         /* c2 */
-	      CONST_BITS+1);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200));   /* c9 */
-    tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102));  /* c3-c9 */
-    tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502));  /* c3+c9 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603));   /* c5 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039));   /* c7 */
-    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */
-	    + MULTIPLY(tmp5, FIX(0.164081699));        /* c11 */
-    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */
-    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */
-	    + MULTIPLY(tmp5, FIX(0.765261039));        /* c7 */
-    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */
-	    - MULTIPLY(tmp5, FIX(0.997307603));        /* c5 */
-    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */
-	    - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+1);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+1);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+1);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+1);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 13x13 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_13x13 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  INT32 z1, z2;
-  DCTELEM workspace[8*5];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT.
-   * cK represents sqrt(2) * cos(K*pi/26).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[12]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[11]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[10]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[9]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[8]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[7]);
-    tmp6 = GETJSAMPLE(elemptr[6]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[12]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[11]);
-    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[10]);
-    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[9]);
-    tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[8]);
-    tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[7]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      (tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 - 13 * CENTERJSAMPLE);
-    tmp6 += tmp6;
-    tmp0 -= tmp6;
-    tmp1 -= tmp6;
-    tmp2 -= tmp6;
-    tmp3 -= tmp6;
-    tmp4 -= tmp6;
-    tmp5 -= tmp6;
-    dataptr[2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0, FIX(1.373119086)) +   /* c2 */
-	      MULTIPLY(tmp1, FIX(1.058554052)) +   /* c6 */
-	      MULTIPLY(tmp2, FIX(0.501487041)) -   /* c10 */
-	      MULTIPLY(tmp3, FIX(0.170464608)) -   /* c12 */
-	      MULTIPLY(tmp4, FIX(0.803364869)) -   /* c8 */
-	      MULTIPLY(tmp5, FIX(1.252223920)),    /* c4 */
-	      CONST_BITS);
-    z1 = MULTIPLY(tmp0 - tmp2, FIX(1.155388986)) - /* (c4+c6)/2 */
-	 MULTIPLY(tmp3 - tmp4, FIX(0.435816023)) - /* (c2-c10)/2 */
-	 MULTIPLY(tmp1 - tmp5, FIX(0.316450131));  /* (c8-c12)/2 */
-    z2 = MULTIPLY(tmp0 + tmp2, FIX(0.096834934)) - /* (c4-c6)/2 */
-	 MULTIPLY(tmp3 + tmp4, FIX(0.937303064)) + /* (c2+c10)/2 */
-	 MULTIPLY(tmp1 + tmp5, FIX(0.486914739));  /* (c8+c12)/2 */
-
-    dataptr[4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS);
-    dataptr[6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.322312651));   /* c3 */
-    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.163874945));   /* c5 */
-    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.937797057)) +  /* c7 */
-	   MULTIPLY(tmp14 + tmp15, FIX(0.338443458));   /* c11 */
-    tmp0 = tmp1 + tmp2 + tmp3 -
-	   MULTIPLY(tmp10, FIX(2.020082300)) +          /* c3+c5+c7-c1 */
-	   MULTIPLY(tmp14, FIX(0.318774355));           /* c9-c11 */
-    tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.937797057)) -  /* c7 */
-	   MULTIPLY(tmp11 + tmp12, FIX(0.338443458));   /* c11 */
-    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.163874945)); /* -c5 */
-    tmp1 += tmp4 + tmp5 +
-	    MULTIPLY(tmp11, FIX(0.837223564)) -         /* c5+c9+c11-c3 */
-	    MULTIPLY(tmp14, FIX(2.341699410));          /* c1+c7 */
-    tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.657217813)); /* -c9 */
-    tmp2 += tmp4 + tmp6 -
-	    MULTIPLY(tmp12, FIX(1.572116027)) +         /* c1+c5-c9-c11 */
-	    MULTIPLY(tmp15, FIX(2.260109708));          /* c3+c7 */
-    tmp3 += tmp5 + tmp6 +
-	    MULTIPLY(tmp13, FIX(2.205608352)) -         /* c3+c5+c9-c7 */
-	    MULTIPLY(tmp15, FIX(1.742345811));          /* c1+c11 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 13)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/13)**2 = 64/169, which we partially
-   * fold into the constant multipliers and final shifting:
-   * cK now represents sqrt(2) * cos(K*pi/26) * 128/169.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*4];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*3];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*2];
-    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*1];
-    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*0];
-    tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*7];
-    tmp6 = dataptr[DCTSIZE*6];
-
-    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*4];
-    tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*3];
-    tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*2];
-    tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*1];
-    tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*0];
-    tmp15 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*7];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6,
-		       FIX(0.757396450)),          /* 128/169 */
-	      CONST_BITS+1);
-    tmp6 += tmp6;
-    tmp0 -= tmp6;
-    tmp1 -= tmp6;
-    tmp2 -= tmp6;
-    tmp3 -= tmp6;
-    tmp4 -= tmp6;
-    tmp5 -= tmp6;
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0, FIX(1.039995521)) +   /* c2 */
-	      MULTIPLY(tmp1, FIX(0.801745081)) +   /* c6 */
-	      MULTIPLY(tmp2, FIX(0.379824504)) -   /* c10 */
-	      MULTIPLY(tmp3, FIX(0.129109289)) -   /* c12 */
-	      MULTIPLY(tmp4, FIX(0.608465700)) -   /* c8 */
-	      MULTIPLY(tmp5, FIX(0.948429952)),    /* c4 */
-	      CONST_BITS+1);
-    z1 = MULTIPLY(tmp0 - tmp2, FIX(0.875087516)) - /* (c4+c6)/2 */
-	 MULTIPLY(tmp3 - tmp4, FIX(0.330085509)) - /* (c2-c10)/2 */
-	 MULTIPLY(tmp1 - tmp5, FIX(0.239678205));  /* (c8-c12)/2 */
-    z2 = MULTIPLY(tmp0 + tmp2, FIX(0.073342435)) - /* (c4-c6)/2 */
-	 MULTIPLY(tmp3 + tmp4, FIX(0.709910013)) + /* (c2+c10)/2 */
-	 MULTIPLY(tmp1 + tmp5, FIX(0.368787494));  /* (c8+c12)/2 */
-
-    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+1);
-    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS+1);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.001514908));   /* c3 */
-    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(0.881514751));   /* c5 */
-    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.710284161)) +  /* c7 */
-	   MULTIPLY(tmp14 + tmp15, FIX(0.256335874));   /* c11 */
-    tmp0 = tmp1 + tmp2 + tmp3 -
-	   MULTIPLY(tmp10, FIX(1.530003162)) +          /* c3+c5+c7-c1 */
-	   MULTIPLY(tmp14, FIX(0.241438564));           /* c9-c11 */
-    tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.710284161)) -  /* c7 */
-	   MULTIPLY(tmp11 + tmp12, FIX(0.256335874));   /* c11 */
-    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(0.881514751)); /* -c5 */
-    tmp1 += tmp4 + tmp5 +
-	    MULTIPLY(tmp11, FIX(0.634110155)) -         /* c5+c9+c11-c3 */
-	    MULTIPLY(tmp14, FIX(1.773594819));          /* c1+c7 */
-    tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.497774438)); /* -c9 */
-    tmp2 += tmp4 + tmp6 -
-	    MULTIPLY(tmp12, FIX(1.190715098)) +         /* c1+c5-c9-c11 */
-	    MULTIPLY(tmp15, FIX(1.711799069));          /* c3+c7 */
-    tmp3 += tmp5 + tmp6 +
-	    MULTIPLY(tmp13, FIX(1.670519935)) -         /* c3+c5+c9-c7 */
-	    MULTIPLY(tmp15, FIX(1.319646532));          /* c1+c11 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+1);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+1);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+1);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+1);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 14x14 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_14x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  DCTELEM workspace[8*6];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT.
-   * cK represents sqrt(2) * cos(K*pi/28).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
-    tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
-    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
-
-    tmp10 = tmp0 + tmp6;
-    tmp14 = tmp0 - tmp6;
-    tmp11 = tmp1 + tmp5;
-    tmp15 = tmp1 - tmp5;
-    tmp12 = tmp2 + tmp4;
-    tmp16 = tmp2 - tmp4;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
-    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
-    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      (tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE);
-    tmp13 += tmp13;
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
-	      MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
-	      MULTIPLY(tmp12 - tmp13, FIX(0.881747734)),  /* c8 */
-	      CONST_BITS);
-
-    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686));    /* c6 */
-
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590))   /* c2-c6 */
-	      + MULTIPLY(tmp16, FIX(0.613604268)),        /* c10 */
-	      CONST_BITS);
-    dataptr[6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954))   /* c6+c10 */
-	      - MULTIPLY(tmp16, FIX(1.378756276)),        /* c2 */
-	      CONST_BITS);
-
-    /* Odd part */
-
-    tmp10 = tmp1 + tmp2;
-    tmp11 = tmp5 - tmp4;
-    dataptr[7] = (DCTELEM) (tmp0 - tmp10 + tmp3 - tmp11 - tmp6);
-    tmp3 <<= CONST_BITS;
-    tmp10 = MULTIPLY(tmp10, - FIX(0.158341681));          /* -c13 */
-    tmp11 = MULTIPLY(tmp11, FIX(1.405321284));            /* c1 */
-    tmp10 += tmp11 - tmp3;
-    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) +     /* c5 */
-	    MULTIPLY(tmp4 + tmp6, FIX(0.752406978));      /* c9 */
-    dataptr[5] = (DCTELEM)
-      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */
-	      + MULTIPLY(tmp4, FIX(1.119999435)),         /* c1+c11-c9 */
-	      CONST_BITS);
-    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) +     /* c3 */
-	    MULTIPLY(tmp5 - tmp6, FIX(0.467085129));      /* c11 */
-    dataptr[3] = (DCTELEM)
-      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */
-	      - MULTIPLY(tmp5, FIX(3.069855259)),         /* c1+c5+c11 */
-	      CONST_BITS);
-    dataptr[1] = (DCTELEM)
-      DESCALE(tmp11 + tmp12 + tmp3 + tmp6 -
-	      MULTIPLY(tmp0 + tmp6, FIX(1.126980169)),    /* c3+c5-c1 */
-	      CONST_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 14)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/14)**2 = 16/49, which we partially
-   * fold into the constant multipliers and final shifting:
-   * cK now represents sqrt(2) * cos(K*pi/28) * 32/49.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3];
-    tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2];
-    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1];
-    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0];
-    tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
-
-    tmp10 = tmp0 + tmp6;
-    tmp14 = tmp0 - tmp6;
-    tmp11 = tmp1 + tmp5;
-    tmp15 = tmp1 - tmp5;
-    tmp12 = tmp2 + tmp4;
-    tmp16 = tmp2 - tmp4;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3];
-    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2];
-    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1];
-    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0];
-    tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13,
-		       FIX(0.653061224)),                 /* 32/49 */
-	      CONST_BITS+1);
-    tmp13 += tmp13;
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */
-	      MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */
-	      MULTIPLY(tmp12 - tmp13, FIX(0.575835255)),  /* c8 */
-	      CONST_BITS+1);
-
-    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570));    /* c6 */
-
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691))   /* c2-c6 */
-	      + MULTIPLY(tmp16, FIX(0.400721155)),        /* c10 */
-	      CONST_BITS+1);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725))   /* c6+c10 */
-	      - MULTIPLY(tmp16, FIX(0.900412262)),        /* c2 */
-	      CONST_BITS+1);
-
-    /* Odd part */
-
-    tmp10 = tmp1 + tmp2;
-    tmp11 = tmp5 - tmp4;
-    dataptr[DCTSIZE*7] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6,
-		       FIX(0.653061224)),                 /* 32/49 */
-	      CONST_BITS+1);
-    tmp3  = MULTIPLY(tmp3 , FIX(0.653061224));            /* 32/49 */
-    tmp10 = MULTIPLY(tmp10, - FIX(0.103406812));          /* -c13 */
-    tmp11 = MULTIPLY(tmp11, FIX(0.917760839));            /* c1 */
-    tmp10 += tmp11 - tmp3;
-    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) +     /* c5 */
-	    MULTIPLY(tmp4 + tmp6, FIX(0.491367823));      /* c9 */
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */
-	      + MULTIPLY(tmp4, FIX(0.731428202)),         /* c1+c11-c9 */
-	      CONST_BITS+1);
-    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) +     /* c3 */
-	    MULTIPLY(tmp5 - tmp6, FIX(0.305035186));      /* c11 */
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */
-	      - MULTIPLY(tmp5, FIX(2.004803435)),         /* c1+c5+c11 */
-	      CONST_BITS+1);
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp11 + tmp12 + tmp3
-	      - MULTIPLY(tmp0, FIX(0.735987049))          /* c3+c5-c1 */
-	      - MULTIPLY(tmp6, FIX(0.082925825)),         /* c9-c11-c13 */
-	      CONST_BITS+1);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 15x15 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_15x15 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 z1, z2, z3;
-  DCTELEM workspace[8*7];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT.
-   * cK represents sqrt(2) * cos(K*pi/30).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[14]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[13]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[12]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[11]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[10]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[9]);
-    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[8]);
-    tmp7 = GETJSAMPLE(elemptr[7]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[14]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[13]);
-    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[12]);
-    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[11]);
-    tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[10]);
-    tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[9]);
-    tmp16 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[8]);
-
-    z1 = tmp0 + tmp4 + tmp5;
-    z2 = tmp1 + tmp3 + tmp6;
-    z3 = tmp2 + tmp7;
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM) (z1 + z2 + z3 - 15 * CENTERJSAMPLE);
-    z3 += z3;
-    dataptr[6] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 - z3, FIX(1.144122806)) - /* c6 */
-	      MULTIPLY(z2 - z3, FIX(0.437016024)),  /* c12 */
-	      CONST_BITS);
-    tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
-    z1 = MULTIPLY(tmp3 - tmp2, FIX(1.531135173)) -  /* c2+c14 */
-         MULTIPLY(tmp6 - tmp2, FIX(2.238241955));   /* c4+c8 */
-    z2 = MULTIPLY(tmp5 - tmp2, FIX(0.798468008)) -  /* c8-c14 */
-	 MULTIPLY(tmp0 - tmp2, FIX(0.091361227));   /* c2-c4 */
-    z3 = MULTIPLY(tmp0 - tmp3, FIX(1.383309603)) +  /* c2 */
-	 MULTIPLY(tmp6 - tmp5, FIX(0.946293579)) +  /* c8 */
-	 MULTIPLY(tmp1 - tmp4, FIX(0.790569415));   /* (c6+c12)/2 */
-
-    dataptr[2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS);
-    dataptr[4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS);
-
-    /* Odd part */
-
-    tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16,
-		    FIX(1.224744871));                         /* c5 */
-    tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.344997024)) + /* c3 */
-	   MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.831253876));  /* c9 */
-    tmp12 = MULTIPLY(tmp12, FIX(1.224744871));                 /* c5 */
-    tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.406466353)) +         /* c1 */
-	   MULTIPLY(tmp11 + tmp14, FIX(1.344997024)) +         /* c3 */
-	   MULTIPLY(tmp13 + tmp15, FIX(0.575212477));          /* c11 */
-    tmp0 = MULTIPLY(tmp13, FIX(0.475753014)) -                 /* c7-c11 */
-	   MULTIPLY(tmp14, FIX(0.513743148)) +                 /* c3-c9 */
-	   MULTIPLY(tmp16, FIX(1.700497885)) + tmp4 + tmp12;   /* c1+c13 */
-    tmp3 = MULTIPLY(tmp10, - FIX(0.355500862)) -               /* -(c1-c7) */
-	   MULTIPLY(tmp11, FIX(2.176250899)) -                 /* c3+c9 */
-	   MULTIPLY(tmp15, FIX(0.869244010)) + tmp4 - tmp12;   /* c11+c13 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 15)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/15)**2 = 64/225, which we partially
-   * fold into the constant multipliers and final shifting:
-   * cK now represents sqrt(2) * cos(K*pi/30) * 256/225.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*6];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*5];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*4];
-    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*3];
-    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*2];
-    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*1];
-    tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*0];
-    tmp7 = dataptr[DCTSIZE*7];
-
-    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*6];
-    tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*5];
-    tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*4];
-    tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*3];
-    tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*2];
-    tmp15 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*1];
-    tmp16 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*0];
-
-    z1 = tmp0 + tmp4 + tmp5;
-    z2 = tmp1 + tmp3 + tmp6;
-    z3 = tmp2 + tmp7;
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 + z2 + z3, FIX(1.137777778)), /* 256/225 */
-	      CONST_BITS+2);
-    z3 += z3;
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 - z3, FIX(1.301757503)) - /* c6 */
-	      MULTIPLY(z2 - z3, FIX(0.497227121)),  /* c12 */
-	      CONST_BITS+2);
-    tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
-    z1 = MULTIPLY(tmp3 - tmp2, FIX(1.742091575)) -  /* c2+c14 */
-         MULTIPLY(tmp6 - tmp2, FIX(2.546621957));   /* c4+c8 */
-    z2 = MULTIPLY(tmp5 - tmp2, FIX(0.908479156)) -  /* c8-c14 */
-	 MULTIPLY(tmp0 - tmp2, FIX(0.103948774));   /* c2-c4 */
-    z3 = MULTIPLY(tmp0 - tmp3, FIX(1.573898926)) +  /* c2 */
-	 MULTIPLY(tmp6 - tmp5, FIX(1.076671805)) +  /* c8 */
-	 MULTIPLY(tmp1 - tmp4, FIX(0.899492312));   /* (c6+c12)/2 */
-
-    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS+2);
-    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS+2);
-
-    /* Odd part */
-
-    tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16,
-		    FIX(1.393487498));                         /* c5 */
-    tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.530307725)) + /* c3 */
-	   MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.945782187));  /* c9 */
-    tmp12 = MULTIPLY(tmp12, FIX(1.393487498));                 /* c5 */
-    tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.600246161)) +         /* c1 */
-	   MULTIPLY(tmp11 + tmp14, FIX(1.530307725)) +         /* c3 */
-	   MULTIPLY(tmp13 + tmp15, FIX(0.654463974));          /* c11 */
-    tmp0 = MULTIPLY(tmp13, FIX(0.541301207)) -                 /* c7-c11 */
-	   MULTIPLY(tmp14, FIX(0.584525538)) +                 /* c3-c9 */
-	   MULTIPLY(tmp16, FIX(1.934788705)) + tmp4 + tmp12;   /* c1+c13 */
-    tmp3 = MULTIPLY(tmp10, - FIX(0.404480980)) -               /* -(c1-c7) */
-	   MULTIPLY(tmp11, FIX(2.476089912)) -                 /* c3+c9 */
-	   MULTIPLY(tmp15, FIX(0.989006518)) + tmp4 - tmp12;   /* c11+c13 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 16x16 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_16x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
-  DCTELEM workspace[DCTSIZE2];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * cK represents sqrt(2) * cos(K*pi/32).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
-    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
-    tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
-
-    tmp10 = tmp0 + tmp7;
-    tmp14 = tmp0 - tmp7;
-    tmp11 = tmp1 + tmp6;
-    tmp15 = tmp1 - tmp6;
-    tmp12 = tmp2 + tmp5;
-    tmp16 = tmp2 - tmp5;
-    tmp13 = tmp3 + tmp4;
-    tmp17 = tmp3 - tmp4;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
-    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
-    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
-    tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
-	      MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
-	      CONST_BITS-PASS1_BITS);
-
-    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
-	    MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
-
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
-	      + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+c10 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
-	      - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
-	    MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
-	    MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
-	    MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
-    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
-	    MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
-    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
-	    MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
-    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
-	    MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
-	    MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
-    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
-	     - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
-    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
-	     + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
-    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
-	     + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == DCTSIZE * 2)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/16)**2 = 1/2**2.
-   * cK represents sqrt(2) * cos(K*pi/32).
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
-    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
-    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
-    tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
-    tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
-
-    tmp10 = tmp0 + tmp7;
-    tmp14 = tmp0 - tmp7;
-    tmp11 = tmp1 + tmp6;
-    tmp15 = tmp1 - tmp6;
-    tmp12 = tmp2 + tmp5;
-    tmp16 = tmp2 - tmp5;
-    tmp13 = tmp3 + tmp4;
-    tmp17 = tmp3 - tmp4;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4];
-    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3];
-    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2];
-    tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1];
-    tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+2);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
-	      MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
-	      CONST_BITS+PASS1_BITS+2);
-
-    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
-	    MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
-
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
-	      + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+10 */
-	      CONST_BITS+PASS1_BITS+2);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
-	      - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
-	      CONST_BITS+PASS1_BITS+2);
-
-    /* Odd part */
-
-    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
-	    MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
-	    MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
-	    MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
-    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
-	    MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
-    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
-	    MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
-    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
-	    MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
-	    MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
-    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
-	     - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
-    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
-	     + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
-    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
-	     + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+2);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+2);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+2);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+2);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 16x8 sample block.
- *
- * 16-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_16x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
-  INT32 z1;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
-    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
-    tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
-
-    tmp10 = tmp0 + tmp7;
-    tmp14 = tmp0 - tmp7;
-    tmp11 = tmp1 + tmp6;
-    tmp15 = tmp1 - tmp6;
-    tmp12 = tmp2 + tmp5;
-    tmp16 = tmp2 - tmp5;
-    tmp13 = tmp3 + tmp4;
-    tmp17 = tmp3 - tmp4;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
-    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
-    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
-    tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
-	      MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
-	      CONST_BITS-PASS1_BITS);
-
-    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
-	    MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
-
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
-	      + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+c10 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
-	      - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
-	    MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
-	    MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
-	    MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
-    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
-	    MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
-    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
-	    MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
-    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
-	    MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
-	    MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
-    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
-	     - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
-    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
-	     + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
-    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
-	     + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by 8/16 = 1/2.
-   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-   */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "c1" should be "c6".
-     */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
-
-    tmp10 = tmp0 + tmp3;
-    tmp12 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp13 = tmp1 - tmp2;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-
-    dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS+1);
-    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS+1);
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);   /* c6 */
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
-	      CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
-	      CONST_BITS+PASS1_BITS+1);
-
-    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * i0..i3 in the paper are tmp0..tmp3 here.
-     */
-
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);   /*  c3 */
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);      /* -c3+c5 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);      /* -c3-c5 */
-    tmp12 += z1;
-    tmp13 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);   /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);          /*  c1+c3-c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);          /* -c1+c3+c5-c7 */
-    tmp0 += z1 + tmp12;
-    tmp3 += z1 + tmp13;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);   /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);          /*  c1+c3+c5-c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);          /*  c1+c3-c5+c7 */
-    tmp1 += z1 + tmp13;
-    tmp2 += z1 + tmp12;
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+PASS1_BITS+1);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 14x7 sample block.
- *
- * 14-point FDCT in pass 1 (rows), 7-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_14x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 z1, z2, z3;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Zero bottom row of output coefficient block. */
-  MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 7; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
-    tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
-    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
-
-    tmp10 = tmp0 + tmp6;
-    tmp14 = tmp0 - tmp6;
-    tmp11 = tmp1 + tmp5;
-    tmp15 = tmp1 - tmp5;
-    tmp12 = tmp2 + tmp4;
-    tmp16 = tmp2 - tmp4;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
-    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
-    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE) << PASS1_BITS);
-    tmp13 += tmp13;
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
-	      MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
-	      MULTIPLY(tmp12 - tmp13, FIX(0.881747734)),  /* c8 */
-	      CONST_BITS-PASS1_BITS);
-
-    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686));    /* c6 */
-
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590))   /* c2-c6 */
-	      + MULTIPLY(tmp16, FIX(0.613604268)),        /* c10 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954))   /* c6+c10 */
-	      - MULTIPLY(tmp16, FIX(1.378756276)),        /* c2 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = tmp1 + tmp2;
-    tmp11 = tmp5 - tmp4;
-    dataptr[7] = (DCTELEM) ((tmp0 - tmp10 + tmp3 - tmp11 - tmp6) << PASS1_BITS);
-    tmp3 <<= CONST_BITS;
-    tmp10 = MULTIPLY(tmp10, - FIX(0.158341681));          /* -c13 */
-    tmp11 = MULTIPLY(tmp11, FIX(1.405321284));            /* c1 */
-    tmp10 += tmp11 - tmp3;
-    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) +     /* c5 */
-	    MULTIPLY(tmp4 + tmp6, FIX(0.752406978));      /* c9 */
-    dataptr[5] = (DCTELEM)
-      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */
-	      + MULTIPLY(tmp4, FIX(1.119999435)),         /* c1+c11-c9 */
-	      CONST_BITS-PASS1_BITS);
-    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) +     /* c3 */
-	    MULTIPLY(tmp5 - tmp6, FIX(0.467085129));      /* c11 */
-    dataptr[3] = (DCTELEM)
-      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */
-	      - MULTIPLY(tmp5, FIX(3.069855259)),         /* c1+c5+c11 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[1] = (DCTELEM)
-      DESCALE(tmp11 + tmp12 + tmp3 + tmp6 -
-	      MULTIPLY(tmp0 + tmp6, FIX(1.126980169)),    /* c3+c5-c1 */
-	      CONST_BITS-PASS1_BITS);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/14)*(8/7) = 32/49, which we
-   * partially fold into the constant multipliers and final shifting:
-   * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14) * 64/49.
-   */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4];
-    tmp3 = dataptr[DCTSIZE*3];
-
-    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6];
-    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5];
-    tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4];
-
-    z1 = tmp0 + tmp2;
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */
-	      CONST_BITS+PASS1_BITS+1);
-    tmp3 += tmp3;
-    z1 -= tmp3;
-    z1 -= tmp3;
-    z1 = MULTIPLY(z1, FIX(0.461784020));                /* (c2+c6-c4)/2 */
-    z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084));       /* (c2+c4-c6)/2 */
-    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446));       /* c6 */
-    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS+1);
-    z1 -= z2;
-    z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509));       /* c4 */
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */
-	      CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS+1);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677));   /* (c3+c1-c5)/2 */
-    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464));   /* (c3+c5-c1)/2 */
-    tmp0 = tmp1 - tmp2;
-    tmp1 += tmp2;
-    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */
-    tmp1 += tmp2;
-    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310));   /* c5 */
-    tmp0 += tmp3;
-    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355));   /* c3+c1-c5 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 12x6 sample block.
- *
- * 12-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_12x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Zero 2 bottom rows of output coefficient block. */
-  MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 6; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
-
-    tmp10 = tmp0 + tmp5;
-    tmp13 = tmp0 - tmp5;
-    tmp11 = tmp1 + tmp4;
-    tmp14 = tmp1 - tmp4;
-    tmp12 = tmp2 + tmp3;
-    tmp15 = tmp2 - tmp3;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
-    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE) << PASS1_BITS);
-    dataptr[6] = (DCTELEM) ((tmp13 - tmp14 - tmp15) << PASS1_BITS);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100);    /* c9 */
-    tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865);   /* c3-c9 */
-    tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065);   /* c3+c9 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054));   /* c5 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669));   /* c7 */
-    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
-	    + MULTIPLY(tmp5, FIX(0.184591911));        /* c11 */
-    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
-    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
-	    + MULTIPLY(tmp5, FIX(0.860918669));        /* c7 */
-    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */
-	    - MULTIPLY(tmp5, FIX(1.121971054));        /* c5 */
-    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */
-	    - MULTIPLY(tmp2 + tmp5, FIX_0_541196100);  /* c9 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/12)*(8/6) = 8/9, which we
-   * partially fold into the constant multipliers and final shifting:
-   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9.
-   */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
-    tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
-
-    tmp10 = tmp0 + tmp2;
-    tmp12 = tmp0 - tmp2;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)),         /* 16/9 */
-	      CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp12, FIX(2.177324216)),                 /* c2 */
-	      CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
-	      CONST_BITS+PASS1_BITS+1);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829));             /* c5 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),   /* 16/9 */
-	      CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)),    /* 16/9 */
-	      CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)),   /* 16/9 */
-	      CONST_BITS+PASS1_BITS+1);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 10x5 sample block.
- *
- * 10-point FDCT in pass 1 (rows), 5-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_10x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Zero 3 bottom rows of output coefficient block. */
-  MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 5; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
-    tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
-
-    tmp10 = tmp0 + tmp4;
-    tmp13 = tmp0 - tmp4;
-    tmp11 = tmp1 + tmp3;
-    tmp14 = tmp1 - tmp3;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << PASS1_BITS);
-    tmp12 += tmp12;
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
-	      MULTIPLY(tmp11 - tmp12, FIX(0.437016024)),  /* c8 */
-	      CONST_BITS-PASS1_BITS);
-    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876));    /* c6 */
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)),  /* c2-c6 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)),  /* c2+c6 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = tmp0 + tmp4;
-    tmp11 = tmp1 - tmp3;
-    dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << PASS1_BITS);
-    tmp2 <<= CONST_BITS;
-    dataptr[1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) +          /* c1 */
-	      MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 +   /* c3 */
-	      MULTIPLY(tmp3, FIX(0.642039522)) +          /* c7 */
-	      MULTIPLY(tmp4, FIX(0.221231742)),           /* c9 */
-	      CONST_BITS-PASS1_BITS);
-    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) -     /* (c3+c7)/2 */
-	    MULTIPLY(tmp1 + tmp3, FIX(0.587785252));      /* (c1-c9)/2 */
-    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) +   /* (c3-c7)/2 */
-	    (tmp11 << (CONST_BITS - 1)) - tmp2;
-    dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-PASS1_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-PASS1_BITS);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/10)*(8/5) = 32/25, which we
-   * fold into the constant multipliers:
-   * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10) * 32/25.
-   */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3];
-    tmp2 = dataptr[DCTSIZE*2];
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)),        /* 32/25 */
-	      CONST_BITS+PASS1_BITS);
-    tmp11 = MULTIPLY(tmp11, FIX(1.011928851));          /* (c2+c4)/2 */
-    tmp10 -= tmp2 << 2;
-    tmp10 = MULTIPLY(tmp10, FIX(0.452548340));          /* (c2-c4)/2 */
-    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961));    /* c3 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */
-	      CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on an 8x4 sample block.
- *
- * 8-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_8x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Zero 4 bottom rows of output coefficient block. */
-  MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * We must also scale the output by 8/4 = 2, which we add here.
-   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 4; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "c1" should be "c6".
-     */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
-
-    tmp10 = tmp0 + tmp3;
-    tmp12 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp13 = tmp1 - tmp2;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1));
-    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1));
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-2);
-
-    dataptr[2] = (DCTELEM)
-      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS-PASS1_BITS-1);
-    dataptr[6] = (DCTELEM)
-      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS-PASS1_BITS-1);
-
-    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * i0..i3 in the paper are tmp0..tmp3 here.
-     */
-
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-2);
-
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
-    tmp12 += z1;
-    tmp13 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
-    tmp0 += z1 + tmp12;
-    tmp3 += z1 + tmp13;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
-    tmp1 += z1 + tmp13;
-    tmp2 += z1 + tmp12;
-
-    dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS-1);
-    dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS-1);
-    dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS-1);
-    dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS-1);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * 4-point FDCT kernel,
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
-   */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1));
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
-
-    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
-    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
-
-    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
-    dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
-
-    /* Odd part */
-
-    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS+PASS1_BITS-1);
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 6x3 sample block.
- *
- * 6-point FDCT in pass 1 (rows), 3-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_6x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2;
-  INT32 tmp10, tmp11, tmp12;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * We scale the results further by 2 as part of output adaption
-   * scaling for different DCT size.
-   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 3; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
-    tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
-
-    tmp10 = tmp0 + tmp2;
-    tmp12 = tmp0 - tmp2;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << (PASS1_BITS+1));
-    dataptr[2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp12, FIX(1.224744871)),                 /* c2 */
-	      CONST_BITS-PASS1_BITS-1);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
-	      CONST_BITS-PASS1_BITS-1);
-
-    /* Odd part */
-
-    tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)),     /* c5 */
-		    CONST_BITS-PASS1_BITS-1);
-
-    dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << (PASS1_BITS+1)));
-    dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << (PASS1_BITS+1));
-    dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << (PASS1_BITS+1)));
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
-   * fold into the constant multipliers (other part was done in pass 1):
-   * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6) * 16/9.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 6; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2];
-    tmp1 = dataptr[DCTSIZE*1];
-
-    tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),        /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */
-	      CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp2, FIX(2.177324216)),               /* c1 */
-	      CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 4x2 sample block.
- *
- * 4-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_4x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  DCTELEM tmp0, tmp2, tmp10, tmp12, tmp4, tmp5;
-  INT32 tmp1, tmp3, tmp11, tmp13;
-  INT32 z1, z2, z3;
-  JSAMPROW elemptr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT.
-   * 4-point FDCT kernel,
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
-   */
-
-  /* Row 0 */
-  elemptr = sample_data[0] + start_col;
-
-  /* Even part */
-
-  tmp4 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
-  tmp5 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
-
-  tmp0 = tmp4 + tmp5;
-  tmp2 = tmp4 - tmp5;
-
-  /* Odd part */
-
-  z2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
-  z3 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
-
-  z1 = MULTIPLY(z2 + z3, FIX_0_541196100);    /* c6 */
-  /* Add fudge factor here for final descale. */
-  z1 += ONE << (CONST_BITS-3-1);
-  tmp1 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
-  tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
-
-  /* Row 1 */
-  elemptr = sample_data[1] + start_col;
-
-  /* Even part */
-
-  tmp4 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
-  tmp5 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
-
-  tmp10 = tmp4 + tmp5;
-  tmp12 = tmp4 - tmp5;
-
-  /* Odd part */
-
-  z2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
-  z3 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
-
-  z1 = MULTIPLY(z2 + z3, FIX_0_541196100);    /* c6 */
-  tmp11 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
-  tmp13 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/4)*(8/2) = 2**3.
-   */
-
-  /* Column 0 */
-  /* Apply unsigned->signed conversion. */
-  data[DCTSIZE*0] = (tmp0 + tmp10 - 8 * CENTERJSAMPLE) << 3;
-  data[DCTSIZE*1] = (tmp0 - tmp10) << 3;
-
-  /* Column 1 */
-  data[DCTSIZE*0+1] = (DCTELEM) RIGHT_SHIFT(tmp1 + tmp11, CONST_BITS-3);
-  data[DCTSIZE*1+1] = (DCTELEM) RIGHT_SHIFT(tmp1 - tmp11, CONST_BITS-3);
-
-  /* Column 2 */
-  data[DCTSIZE*0+2] = (tmp2 + tmp12) << 3;
-  data[DCTSIZE*1+2] = (tmp2 - tmp12) << 3;
-
-  /* Column 3 */
-  data[DCTSIZE*0+3] = (DCTELEM) RIGHT_SHIFT(tmp3 + tmp13, CONST_BITS-3);
-  data[DCTSIZE*1+3] = (DCTELEM) RIGHT_SHIFT(tmp3 - tmp13, CONST_BITS-3);
-}
-
-
-/*
- * Perform the forward DCT on a 2x1 sample block.
- *
- * 2-point FDCT in pass 1 (rows), 1-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_2x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  DCTELEM tmp0, tmp1;
-  JSAMPROW elemptr;
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  elemptr = sample_data[0] + start_col;
-
-  tmp0 = GETJSAMPLE(elemptr[0]);
-  tmp1 = GETJSAMPLE(elemptr[1]);
-
-  /* We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/2)*(8/1) = 2**5.
-   */
-
-  /* Even part */
-
-  /* Apply unsigned->signed conversion. */
-  data[0] = (tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5;
-
-  /* Odd part */
-
-  data[1] = (tmp0 - tmp1) << 5;
-}
-
-
-/*
- * Perform the forward DCT on an 8x16 sample block.
- *
- * 8-point FDCT in pass 1 (rows), 16-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_8x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
-  INT32 z1;
-  DCTELEM workspace[DCTSIZE2];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "c1" should be "c6".
-     */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
-
-    tmp10 = tmp0 + tmp3;
-    tmp12 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp13 = tmp1 - tmp2;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
-    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);   /* c6 */
-    dataptr[2] = (DCTELEM)
-      DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM)
-      DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * i0..i3 in the paper are tmp0..tmp3 here.
-     */
-
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);   /*  c3 */
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);      /* -c3+c5 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);      /* -c3-c5 */
-    tmp12 += z1;
-    tmp13 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);   /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);          /*  c1+c3-c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);          /* -c1+c3+c5-c7 */
-    tmp0 += z1 + tmp12;
-    tmp3 += z1 + tmp13;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);   /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);          /*  c1+c3+c5-c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);          /*  c1+c3-c5+c7 */
-    tmp1 += z1 + tmp13;
-    tmp2 += z1 + tmp12;
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-PASS1_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == DCTSIZE * 2)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by 8/16 = 1/2.
-   * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
-    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
-    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
-    tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
-    tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
-
-    tmp10 = tmp0 + tmp7;
-    tmp14 = tmp0 - tmp7;
-    tmp11 = tmp1 + tmp6;
-    tmp15 = tmp1 - tmp6;
-    tmp12 = tmp2 + tmp5;
-    tmp16 = tmp2 - tmp5;
-    tmp13 = tmp3 + tmp4;
-    tmp17 = tmp3 - tmp4;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4];
-    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3];
-    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2];
-    tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1];
-    tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+1);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
-	      MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
-	      CONST_BITS+PASS1_BITS+1);
-
-    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
-	    MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
-
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
-	      + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+c10 */
-	      CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
-	      - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
-	      CONST_BITS+PASS1_BITS+1);
-
-    /* Odd part */
-
-    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
-	    MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
-	    MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
-	    MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
-    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
-	    MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
-    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
-	    MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
-    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
-	    MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
-	    MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
-    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
-	     - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
-    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
-	     + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
-    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
-	     + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+1);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 7x14 sample block.
- *
- * 7-point FDCT in pass 1 (rows), 14-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_7x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 z1, z2, z3;
-  DCTELEM workspace[8*6];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
-    tmp3 = GETJSAMPLE(elemptr[3]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
-    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
-
-    z1 = tmp0 + tmp2;
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS);
-    tmp3 += tmp3;
-    z1 -= tmp3;
-    z1 -= tmp3;
-    z1 = MULTIPLY(z1, FIX(0.353553391));                /* (c2+c6-c4)/2 */
-    z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002));       /* (c2+c4-c6)/2 */
-    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123));       /* c6 */
-    dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
-    z1 -= z2;
-    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734));       /* c4 */
-    dataptr[4] = (DCTELEM)
-      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347));   /* (c3+c1-c5)/2 */
-    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339));   /* (c3+c5-c1)/2 */
-    tmp0 = tmp1 - tmp2;
-    tmp1 += tmp2;
-    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */
-    tmp1 += tmp2;
-    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268));   /* c5 */
-    tmp0 += tmp3;
-    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693));   /* c3+c1-c5 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 14)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/7)*(8/14) = 32/49, which we
-   * fold into the constant multipliers:
-   * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28) * 32/49.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 7; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3];
-    tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2];
-    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1];
-    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0];
-    tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
-
-    tmp10 = tmp0 + tmp6;
-    tmp14 = tmp0 - tmp6;
-    tmp11 = tmp1 + tmp5;
-    tmp15 = tmp1 - tmp5;
-    tmp12 = tmp2 + tmp4;
-    tmp16 = tmp2 - tmp4;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3];
-    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2];
-    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1];
-    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0];
-    tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13,
-		       FIX(0.653061224)),                 /* 32/49 */
-	      CONST_BITS+PASS1_BITS);
-    tmp13 += tmp13;
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */
-	      MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */
-	      MULTIPLY(tmp12 - tmp13, FIX(0.575835255)),  /* c8 */
-	      CONST_BITS+PASS1_BITS);
-
-    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570));    /* c6 */
-
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691))   /* c2-c6 */
-	      + MULTIPLY(tmp16, FIX(0.400721155)),        /* c10 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725))   /* c6+c10 */
-	      - MULTIPLY(tmp16, FIX(0.900412262)),        /* c2 */
-	      CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = tmp1 + tmp2;
-    tmp11 = tmp5 - tmp4;
-    dataptr[DCTSIZE*7] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6,
-		       FIX(0.653061224)),                 /* 32/49 */
-	      CONST_BITS+PASS1_BITS);
-    tmp3  = MULTIPLY(tmp3 , FIX(0.653061224));            /* 32/49 */
-    tmp10 = MULTIPLY(tmp10, - FIX(0.103406812));          /* -c13 */
-    tmp11 = MULTIPLY(tmp11, FIX(0.917760839));            /* c1 */
-    tmp10 += tmp11 - tmp3;
-    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) +     /* c5 */
-	    MULTIPLY(tmp4 + tmp6, FIX(0.491367823));      /* c9 */
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */
-	      + MULTIPLY(tmp4, FIX(0.731428202)),         /* c1+c11-c9 */
-	      CONST_BITS+PASS1_BITS);
-    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) +     /* c3 */
-	    MULTIPLY(tmp5 - tmp6, FIX(0.305035186));      /* c11 */
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */
-	      - MULTIPLY(tmp5, FIX(2.004803435)),         /* c1+c5+c11 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp11 + tmp12 + tmp3
-	      - MULTIPLY(tmp0, FIX(0.735987049))          /* c3+c5-c1 */
-	      - MULTIPLY(tmp6, FIX(0.082925825)),         /* c9-c11-c13 */
-	      CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 6x12 sample block.
- *
- * 6-point FDCT in pass 1 (rows), 12-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_6x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  DCTELEM workspace[8*4];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
-    tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
-
-    tmp10 = tmp0 + tmp2;
-    tmp12 = tmp0 - tmp2;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS);
-    dataptr[2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp12, FIX(1.224744871)),                 /* c2 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)),     /* c5 */
-		    CONST_BITS-PASS1_BITS);
-
-    dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
-    dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
-    dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 12)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/6)*(8/12) = 8/9, which we
-   * fold into the constant multipliers:
-   * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24) * 8/9.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 6; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1];
-    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0];
-    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7];
-    tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6];
-
-    tmp10 = tmp0 + tmp5;
-    tmp13 = tmp0 - tmp5;
-    tmp11 = tmp1 + tmp4;
-    tmp14 = tmp1 - tmp4;
-    tmp12 = tmp2 + tmp3;
-    tmp15 = tmp2 - tmp3;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2];
-    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1];
-    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0];
-    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7];
-    tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)),         /* c4 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) +        /* 8/9 */
-	      MULTIPLY(tmp13 + tmp15, FIX(1.214244803)),         /* c2 */
-	      CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200));   /* c9 */
-    tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102));  /* c3-c9 */
-    tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502));  /* c3+c9 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603));   /* c5 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039));   /* c7 */
-    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */
-	    + MULTIPLY(tmp5, FIX(0.164081699));        /* c11 */
-    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */
-    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */
-	    + MULTIPLY(tmp5, FIX(0.765261039));        /* c7 */
-    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */
-	    - MULTIPLY(tmp5, FIX(0.997307603));        /* c5 */
-    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */
-	    - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 5x10 sample block.
- *
- * 5-point FDCT in pass 1 (rows), 10-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_5x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  DCTELEM workspace[8*2];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
-    tmp2 = GETJSAMPLE(elemptr[2]);
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << PASS1_BITS);
-    tmp11 = MULTIPLY(tmp11, FIX(0.790569415));          /* (c2+c4)/2 */
-    tmp10 -= tmp2 << 2;
-    tmp10 = MULTIPLY(tmp10, FIX(0.353553391));          /* (c2-c4)/2 */
-    dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS);
-    dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876));    /* c3 */
-
-    dataptr[1] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
-	      CONST_BITS-PASS1_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 10)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/5)*(8/10) = 32/25, which we
-   * fold into the constant multipliers:
-   * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20) * 32/25.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 5; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0];
-    tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6];
-    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
-
-    tmp10 = tmp0 + tmp4;
-    tmp13 = tmp0 - tmp4;
-    tmp11 = tmp1 + tmp3;
-    tmp14 = tmp1 - tmp3;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7];
-    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6];
-    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */
-	      CONST_BITS+PASS1_BITS);
-    tmp12 += tmp12;
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */
-	      MULTIPLY(tmp11 - tmp12, FIX(0.559380511)),  /* c8 */
-	      CONST_BITS+PASS1_BITS);
-    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961));    /* c6 */
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)),  /* c2-c6 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)),  /* c2+c6 */
-	      CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = tmp0 + tmp4;
-    tmp11 = tmp1 - tmp3;
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)),  /* 32/25 */
-	      CONST_BITS+PASS1_BITS);
-    tmp2 = MULTIPLY(tmp2, FIX(1.28));                     /* 32/25 */
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) +          /* c1 */
-	      MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 +   /* c3 */
-	      MULTIPLY(tmp3, FIX(0.821810588)) +          /* c7 */
-	      MULTIPLY(tmp4, FIX(0.283176630)),           /* c9 */
-	      CONST_BITS+PASS1_BITS);
-    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) -     /* (c3+c7)/2 */
-	    MULTIPLY(tmp1 + tmp3, FIX(0.752365123));      /* (c1-c9)/2 */
-    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) +   /* (c3-c7)/2 */
-	    MULTIPLY(tmp11, FIX(0.64)) - tmp2;            /* 16/25 */
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 4x8 sample block.
- *
- * 4-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_4x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * We must also scale the output by 8/4 = 2, which we add here.
-   * 4-point FDCT kernel,
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+1));
-    dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+1));
-
-    /* Odd part */
-
-    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-2);
-
-    dataptr[1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS-PASS1_BITS-1);
-    dataptr[3] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS-PASS1_BITS-1);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 4; ctr++) {
-    /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "c1" should be "c6".
-     */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
-
-    /* Add fudge factor here for final descale. */
-    tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1));
-    tmp12 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp13 = tmp1 - tmp2;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-
-    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
-    dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS+PASS1_BITS-1);
-
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS+PASS1_BITS);
-
-    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * i0..i3 in the paper are tmp0..tmp3 here.
-     */
-
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS+PASS1_BITS-1);
-
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
-    tmp12 += z1;
-    tmp13 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
-    tmp0 += z1 + tmp12;
-    tmp3 += z1 + tmp13;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
-    tmp1 += z1 + tmp13;
-    tmp2 += z1 + tmp12;
-
-    dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 3x6 sample block.
- *
- * 3-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_3x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2;
-  INT32 tmp10, tmp11, tmp12;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * We scale the results further by 2 as part of output adaption
-   * scaling for different DCT size.
-   * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 6; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
-    tmp1 = GETJSAMPLE(elemptr[1]);
-
-    tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+1));
-    dataptr[2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
-	      CONST_BITS-PASS1_BITS-1);
-
-    /* Odd part */
-
-    dataptr[1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp2, FIX(1.224744871)),               /* c1 */
-	      CONST_BITS-PASS1_BITS-1);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
-   * fold into the constant multipliers (other part was done in pass 1):
-   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 3; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
-    tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
-
-    tmp10 = tmp0 + tmp2;
-    tmp12 = tmp0 - tmp2;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)),         /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp12, FIX(2.177324216)),                 /* c2 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
-	      CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829));             /* c5 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),   /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)),    /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)),   /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 2x4 sample block.
- *
- * 2-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_2x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1;
-  INT32 tmp10, tmp11;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 4; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]);
-    tmp1 = GETJSAMPLE(elemptr[1]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM) (tmp0 + tmp1 - 2 * CENTERJSAMPLE);
-
-    /* Odd part */
-
-    dataptr[1] = (DCTELEM) (tmp0 - tmp1);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/2)*(8/4) = 2**3.
-   * 4-point FDCT kernel,
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 2; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
-
-    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
-    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
-
-    dataptr[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp1) << 3);
-    dataptr[DCTSIZE*2] = (DCTELEM) ((tmp0 - tmp1) << 3);
-
-    /* Odd part */
-
-    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-3-1);
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS-3);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS-3);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 1x2 sample block.
- *
- * 1-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_1x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  DCTELEM tmp0, tmp1;
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: empty. */
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/1)*(8/2) = 2**5.
-   */
-
-  /* Even part */
-
-  tmp0 = GETJSAMPLE(sample_data[0][start_col]);
-  tmp1 = GETJSAMPLE(sample_data[1][start_col]);
-
-  /* Apply unsigned->signed conversion. */
-  data[DCTSIZE*0] = (tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5;
-
-  /* Odd part */
-
-  data[DCTSIZE*1] = (tmp0 - tmp1) << 5;
-}
-
-#endif /* DCT_SCALING_SUPPORTED */
-#endif /* DCT_ISLOW_SUPPORTED */
diff --git a/dep/libjpeg/src/jidctflt.c b/dep/libjpeg/src/jidctflt.c
deleted file mode 100644
index e33a2b5e4..000000000
--- a/dep/libjpeg/src/jidctflt.c
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- * jidctflt.c
- *
- * Copyright (C) 1994-1998, Thomas G. Lane.
- * Modified 2010-2017 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains a floating-point implementation of the
- * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
- * must also perform dequantization of the input coefficients.
- *
- * This implementation should be more accurate than either of the integer
- * IDCT implementations.  However, it may not give the same results on all
- * machines because of differences in roundoff behavior.  Speed will depend
- * on the hardware's floating point capacity.
- *
- * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
- * on each row (or vice versa, but it's more convenient to emit a row at
- * a time).  Direct algorithms are also available, but they are much more
- * complex and seem not to be any faster when reduced to code.
- *
- * This implementation is based on Arai, Agui, and Nakajima's algorithm for
- * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
- * Japanese, but the algorithm is described in the Pennebaker & Mitchell
- * JPEG textbook (see REFERENCES section in file README).  The following code
- * is based directly on figure 4-8 in P&M.
- * While an 8-point DCT cannot be done in less than 11 multiplies, it is
- * possible to arrange the computation so that many of the multiplies are
- * simple scalings of the final outputs.  These multiplies can then be
- * folded into the multiplications or divisions by the JPEG quantization
- * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
- * to be done in the DCT itself.
- * The primary disadvantage of this method is that with a fixed-point
- * implementation, accuracy is lost due to imprecise representation of the
- * scaled quantization values.  However, that problem does not arise if
- * we use floating point arithmetic.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-#ifdef DCT_FLOAT_SUPPORTED
-
-
-/*
- * This module is specialized to the case DCTSIZE = 8.
- */
-
-#if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
-#endif
-
-
-/* Dequantize a coefficient by multiplying it by the multiplier-table
- * entry; produce a float result.
- */
-
-#define DEQUANTIZE(coef,quantval)  (((FAST_FLOAT) (coef)) * (quantval))
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients.
- *
- * cK represents cos(K*pi/16).
- */
-
-GLOBAL(void)
-jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
-  FAST_FLOAT z5, z10, z11, z12, z13;
-  JCOEFPTR inptr;
-  FLOAT_MULT_TYPE * quantptr;
-  FAST_FLOAT * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (FLOAT_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = DCTSIZE; ctr > 0; ctr--) {
-    /* Due to quantization, we will usually find that many of the input
-     * coefficients are zero, especially the AC terms.  We can exploit this
-     * by short-circuiting the IDCT calculation for any column in which all
-     * the AC terms are zero.  In that case each output is equal to the
-     * DC coefficient (with scale factor as needed).
-     * With typical images and quantization tables, half or more of the
-     * column DCT calculations can be simplified this way.
-     */
-
-    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-	inptr[DCTSIZE*7] == 0) {
-      /* AC terms all zero */
-      FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-
-      wsptr[DCTSIZE*0] = dcval;
-      wsptr[DCTSIZE*1] = dcval;
-      wsptr[DCTSIZE*2] = dcval;
-      wsptr[DCTSIZE*3] = dcval;
-      wsptr[DCTSIZE*4] = dcval;
-      wsptr[DCTSIZE*5] = dcval;
-      wsptr[DCTSIZE*6] = dcval;
-      wsptr[DCTSIZE*7] = dcval;
-
-      inptr++;			/* advance pointers to next column */
-      quantptr++;
-      wsptr++;
-      continue;
-    }
-
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp10 = tmp0 + tmp2;	/* phase 3 */
-    tmp11 = tmp0 - tmp2;
-
-    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
-    tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */
-
-    tmp0 = tmp10 + tmp13;	/* phase 2 */
-    tmp3 = tmp10 - tmp13;
-    tmp1 = tmp11 + tmp12;
-    tmp2 = tmp11 - tmp12;
-
-    /* Odd part */
-
-    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    z13 = tmp6 + tmp5;		/* phase 6 */
-    z10 = tmp6 - tmp5;
-    z11 = tmp4 + tmp7;
-    z12 = tmp4 - tmp7;
-
-    tmp7 = z11 + z13;		/* phase 5 */
-    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */
-
-    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
-    tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */
-    tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */
-
-    tmp6 = tmp12 - tmp7;	/* phase 2 */
-    tmp5 = tmp11 - tmp6;
-    tmp4 = tmp10 - tmp5;
-
-    wsptr[DCTSIZE*0] = tmp0 + tmp7;
-    wsptr[DCTSIZE*7] = tmp0 - tmp7;
-    wsptr[DCTSIZE*1] = tmp1 + tmp6;
-    wsptr[DCTSIZE*6] = tmp1 - tmp6;
-    wsptr[DCTSIZE*2] = tmp2 + tmp5;
-    wsptr[DCTSIZE*5] = tmp2 - tmp5;
-    wsptr[DCTSIZE*3] = tmp3 + tmp4;
-    wsptr[DCTSIZE*4] = tmp3 - tmp4;
-
-    inptr++;			/* advance pointers to next column */
-    quantptr++;
-    wsptr++;
-  }
-
-  /* Pass 2: process rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-    /* Rows of zeroes can be exploited in the same way as we did with columns.
-     * However, the column calculation has created many nonzero AC terms, so
-     * the simplification applies less often (typically 5% to 10% of the time).
-     * And testing floats for zero is relatively expensive, so we don't bother.
-     */
-
-    /* Even part */
-
-    /* Prepare range-limit and float->int conversion */
-    z5 = wsptr[0] + (((FAST_FLOAT) RANGE_CENTER) + ((FAST_FLOAT) 0.5));
-    tmp10 = z5 + wsptr[4];
-    tmp11 = z5 - wsptr[4];
-
-    tmp13 = wsptr[2] + wsptr[6];
-    tmp12 = (wsptr[2] - wsptr[6]) *
-	      ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */
-
-    tmp0 = tmp10 + tmp13;
-    tmp3 = tmp10 - tmp13;
-    tmp1 = tmp11 + tmp12;
-    tmp2 = tmp11 - tmp12;
-
-    /* Odd part */
-
-    z13 = wsptr[5] + wsptr[3];
-    z10 = wsptr[5] - wsptr[3];
-    z11 = wsptr[1] + wsptr[7];
-    z12 = wsptr[1] - wsptr[7];
-
-    tmp7 = z11 + z13;		/* phase 5 */
-    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */
-
-    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
-    tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */
-    tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */
-
-    tmp6 = tmp12 - tmp7;	/* phase 2 */
-    tmp5 = tmp11 - tmp6;
-    tmp4 = tmp10 - tmp5;
-
-    /* Final output stage: float->int conversion and range-limit */
-
-    outptr[0] = range_limit[(int) (tmp0 + tmp7) & RANGE_MASK];
-    outptr[7] = range_limit[(int) (tmp0 - tmp7) & RANGE_MASK];
-    outptr[1] = range_limit[(int) (tmp1 + tmp6) & RANGE_MASK];
-    outptr[6] = range_limit[(int) (tmp1 - tmp6) & RANGE_MASK];
-    outptr[2] = range_limit[(int) (tmp2 + tmp5) & RANGE_MASK];
-    outptr[5] = range_limit[(int) (tmp2 - tmp5) & RANGE_MASK];
-    outptr[3] = range_limit[(int) (tmp3 + tmp4) & RANGE_MASK];
-    outptr[4] = range_limit[(int) (tmp3 - tmp4) & RANGE_MASK];
-
-    wsptr += DCTSIZE;		/* advance pointer to next row */
-  }
-}
-
-#endif /* DCT_FLOAT_SUPPORTED */
diff --git a/dep/libjpeg/src/jidctfst.c b/dep/libjpeg/src/jidctfst.c
deleted file mode 100644
index 1ac3e39cb..000000000
--- a/dep/libjpeg/src/jidctfst.c
+++ /dev/null
@@ -1,351 +0,0 @@
-/*
- * jidctfst.c
- *
- * Copyright (C) 1994-1998, Thomas G. Lane.
- * Modified 2015-2017 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains a fast, not so accurate integer implementation of the
- * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
- * must also perform dequantization of the input coefficients.
- *
- * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
- * on each row (or vice versa, but it's more convenient to emit a row at
- * a time).  Direct algorithms are also available, but they are much more
- * complex and seem not to be any faster when reduced to code.
- *
- * This implementation is based on Arai, Agui, and Nakajima's algorithm for
- * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
- * Japanese, but the algorithm is described in the Pennebaker & Mitchell
- * JPEG textbook (see REFERENCES section in file README).  The following code
- * is based directly on figure 4-8 in P&M.
- * While an 8-point DCT cannot be done in less than 11 multiplies, it is
- * possible to arrange the computation so that many of the multiplies are
- * simple scalings of the final outputs.  These multiplies can then be
- * folded into the multiplications or divisions by the JPEG quantization
- * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
- * to be done in the DCT itself.
- * The primary disadvantage of this method is that with fixed-point math,
- * accuracy is lost due to imprecise representation of the scaled
- * quantization values.  The smaller the quantization table entry, the less
- * precise the scaled value, so this implementation does worse with high-
- * quality-setting files than with low-quality ones.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-#ifdef DCT_IFAST_SUPPORTED
-
-
-/*
- * This module is specialized to the case DCTSIZE = 8.
- */
-
-#if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
-#endif
-
-
-/* Scaling decisions are generally the same as in the LL&M algorithm;
- * see jidctint.c for more details.  However, we choose to descale
- * (right shift) multiplication products as soon as they are formed,
- * rather than carrying additional fractional bits into subsequent additions.
- * This compromises accuracy slightly, but it lets us save a few shifts.
- * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
- * everywhere except in the multiplications proper; this saves a good deal
- * of work on 16-bit-int machines.
- *
- * The dequantized coefficients are not integers because the AA&N scaling
- * factors have been incorporated.  We represent them scaled up by PASS1_BITS,
- * so that the first and second IDCT rounds have the same input scaling.
- * For 8-bit JSAMPLEs, we choose IFAST_SCALE_BITS = PASS1_BITS so as to
- * avoid a descaling shift; this compromises accuracy rather drastically
- * for small quantization table entries, but it saves a lot of shifts.
- * For 12-bit JSAMPLEs, there's no hope of using 16x16 multiplies anyway,
- * so we use a much larger scaling factor to preserve accuracy.
- *
- * A final compromise is to represent the multiplicative constants to only
- * 8 fractional bits, rather than 13.  This saves some shifting work on some
- * machines, and may also reduce the cost of multiplication (since there
- * are fewer one-bits in the constants).
- */
-
-#if BITS_IN_JSAMPLE == 8
-#define CONST_BITS  8
-#define PASS1_BITS  2
-#else
-#define CONST_BITS  8
-#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
-#endif
-
-/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
- * causing a lot of useless floating-point operations at run time.
- * To get around this we use the following pre-calculated constants.
- * If you change CONST_BITS you may want to add appropriate values.
- * (With a reasonable C compiler, you can just rely on the FIX() macro...)
- */
-
-#if CONST_BITS == 8
-#define FIX_1_082392200  ((INT32)  277)		/* FIX(1.082392200) */
-#define FIX_1_414213562  ((INT32)  362)		/* FIX(1.414213562) */
-#define FIX_1_847759065  ((INT32)  473)		/* FIX(1.847759065) */
-#define FIX_2_613125930  ((INT32)  669)		/* FIX(2.613125930) */
-#else
-#define FIX_1_082392200  FIX(1.082392200)
-#define FIX_1_414213562  FIX(1.414213562)
-#define FIX_1_847759065  FIX(1.847759065)
-#define FIX_2_613125930  FIX(2.613125930)
-#endif
-
-
-/* We can gain a little more speed, with a further compromise in accuracy,
- * by omitting the addition in a descaling shift.  This yields an incorrectly
- * rounded result half the time...
- */
-
-#ifndef USE_ACCURATE_ROUNDING
-#undef DESCALE
-#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
-#endif
-
-
-/* Multiply a DCTELEM variable by an INT32 constant, and immediately
- * descale to yield a DCTELEM result.
- */
-
-#define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
-
-
-/* Dequantize a coefficient by multiplying it by the multiplier-table
- * entry; produce a DCTELEM result.  For 8-bit data a 16x16->16
- * multiplication will do.  For 12-bit data, the multiplier table is
- * declared INT32, so a 32-bit multiply will be used.
- */
-
-#if BITS_IN_JSAMPLE == 8
-#define DEQUANTIZE(coef,quantval)  (((IFAST_MULT_TYPE) (coef)) * (quantval))
-#else
-#define DEQUANTIZE(coef,quantval)  \
-	DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS)
-#endif
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients.
- *
- * cK represents cos(K*pi/16).
- */
-
-GLOBAL(void)
-jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  DCTELEM tmp10, tmp11, tmp12, tmp13;
-  DCTELEM z5, z10, z11, z12, z13;
-  JCOEFPTR inptr;
-  IFAST_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[DCTSIZE2];	/* buffers data between passes */
-  SHIFT_TEMPS			/* for DESCALE */
-  ISHIFT_TEMPS			/* for IRIGHT_SHIFT */
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (IFAST_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = DCTSIZE; ctr > 0; ctr--) {
-    /* Due to quantization, we will usually find that many of the input
-     * coefficients are zero, especially the AC terms.  We can exploit this
-     * by short-circuiting the IDCT calculation for any column in which all
-     * the AC terms are zero.  In that case each output is equal to the
-     * DC coefficient (with scale factor as needed).
-     * With typical images and quantization tables, half or more of the
-     * column DCT calculations can be simplified this way.
-     */
-    
-    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-	inptr[DCTSIZE*7] == 0) {
-      /* AC terms all zero */
-      int dcval = (int) DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-
-      wsptr[DCTSIZE*0] = dcval;
-      wsptr[DCTSIZE*1] = dcval;
-      wsptr[DCTSIZE*2] = dcval;
-      wsptr[DCTSIZE*3] = dcval;
-      wsptr[DCTSIZE*4] = dcval;
-      wsptr[DCTSIZE*5] = dcval;
-      wsptr[DCTSIZE*6] = dcval;
-      wsptr[DCTSIZE*7] = dcval;
-      
-      inptr++;			/* advance pointers to next column */
-      quantptr++;
-      wsptr++;
-      continue;
-    }
-    
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp10 = tmp0 + tmp2;	/* phase 3 */
-    tmp11 = tmp0 - tmp2;
-
-    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
-    tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
-
-    tmp0 = tmp10 + tmp13;	/* phase 2 */
-    tmp3 = tmp10 - tmp13;
-    tmp1 = tmp11 + tmp12;
-    tmp2 = tmp11 - tmp12;
-    
-    /* Odd part */
-
-    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    z13 = tmp6 + tmp5;		/* phase 6 */
-    z10 = tmp6 - tmp5;
-    z11 = tmp4 + tmp7;
-    z12 = tmp4 - tmp7;
-
-    tmp7 = z11 + z13;		/* phase 5 */
-    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
-
-    z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
-    tmp10 = z5 - MULTIPLY(z12, FIX_1_082392200); /* 2*(c2-c6) */
-    tmp12 = z5 - MULTIPLY(z10, FIX_2_613125930); /* 2*(c2+c6) */
-
-    tmp6 = tmp12 - tmp7;	/* phase 2 */
-    tmp5 = tmp11 - tmp6;
-    tmp4 = tmp10 - tmp5;
-
-    wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);
-    wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);
-    wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);
-    wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);
-    wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5);
-    wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);
-    wsptr[DCTSIZE*3] = (int) (tmp3 + tmp4);
-    wsptr[DCTSIZE*4] = (int) (tmp3 - tmp4);
-
-    inptr++;			/* advance pointers to next column */
-    quantptr++;
-    wsptr++;
-  }
-  
-  /* Pass 2: process rows from work array, store into output array.
-   * Note that we must descale the results by a factor of 8 == 2**3,
-   * and also undo the PASS1_BITS scaling.
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z5 = (DCTELEM) wsptr[0] +
-	   ((((DCTELEM) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (1 << (PASS1_BITS+2)));
-
-    /* Rows of zeroes can be exploited in the same way as we did with columns.
-     * However, the column calculation has created many nonzero AC terms, so
-     * the simplification applies less often (typically 5% to 10% of the time).
-     * On machines with very fast multiplication, it's possible that the
-     * test takes more time than it's worth.  In that case this section
-     * may be commented out.
-     */
-    
-#ifndef NO_ZERO_ROW_TEST
-    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
-	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
-      /* AC terms all zero */
-      JSAMPLE dcval = range_limit[(int) IRIGHT_SHIFT(z5, PASS1_BITS+3)
-				  & RANGE_MASK];
-      
-      outptr[0] = dcval;
-      outptr[1] = dcval;
-      outptr[2] = dcval;
-      outptr[3] = dcval;
-      outptr[4] = dcval;
-      outptr[5] = dcval;
-      outptr[6] = dcval;
-      outptr[7] = dcval;
-
-      wsptr += DCTSIZE;		/* advance pointer to next row */
-      continue;
-    }
-#endif
-    
-    /* Even part */
-
-    tmp10 = z5 + (DCTELEM) wsptr[4];
-    tmp11 = z5 - (DCTELEM) wsptr[4];
-
-    tmp13 = (DCTELEM) wsptr[2] + (DCTELEM) wsptr[6];
-    tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6],
-		     FIX_1_414213562) - tmp13; /* 2*c4 */
-
-    tmp0 = tmp10 + tmp13;
-    tmp3 = tmp10 - tmp13;
-    tmp1 = tmp11 + tmp12;
-    tmp2 = tmp11 - tmp12;
-
-    /* Odd part */
-
-    z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];
-    z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];
-    z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
-    z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
-
-    tmp7 = z11 + z13;		/* phase 5 */
-    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
-
-    z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
-    tmp10 = z5 - MULTIPLY(z12, FIX_1_082392200); /* 2*(c2-c6) */
-    tmp12 = z5 - MULTIPLY(z10, FIX_2_613125930); /* 2*(c2+c6) */
-
-    tmp6 = tmp12 - tmp7;	/* phase 2 */
-    tmp5 = tmp11 - tmp6;
-    tmp4 = tmp10 - tmp5;
-
-    /* Final output stage: scale down by a factor of 8 and range-limit */
-
-    outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp7, PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[7] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp7, PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp1 + tmp6, PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) IRIGHT_SHIFT(tmp1 - tmp6, PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp5, PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp5, PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) IRIGHT_SHIFT(tmp3 + tmp4, PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) IRIGHT_SHIFT(tmp3 - tmp4, PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += DCTSIZE;		/* advance pointer to next row */
-  }
-}
-
-#endif /* DCT_IFAST_SUPPORTED */
diff --git a/dep/libjpeg/src/jidctint.c b/dep/libjpeg/src/jidctint.c
deleted file mode 100644
index e30ec8c8a..000000000
--- a/dep/libjpeg/src/jidctint.c
+++ /dev/null
@@ -1,5240 +0,0 @@
-/*
- * jidctint.c
- *
- * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modification developed 2002-2018 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains a slow-but-accurate integer implementation of the
- * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
- * must also perform dequantization of the input coefficients.
- *
- * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
- * on each row (or vice versa, but it's more convenient to emit a row at
- * a time).  Direct algorithms are also available, but they are much more
- * complex and seem not to be any faster when reduced to code.
- *
- * This implementation is based on an algorithm described in
- *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
- *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
- *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
- * The primary algorithm described there uses 11 multiplies and 29 adds.
- * We use their alternate method with 12 multiplies and 32 adds.
- * The advantage of this method is that no data path contains more than one
- * multiplication; this allows a very simple and accurate implementation in
- * scaled fixed-point arithmetic, with a minimal number of shifts.
- *
- * We also provide IDCT routines with various output sample block sizes for
- * direct resolution reduction or enlargement and for direct resolving the
- * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
- * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 input DCT block.
- *
- * For N<8 we simply take the corresponding low-frequency coefficients of
- * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
- * to yield the downscaled outputs.
- * This can be seen as direct low-pass downsampling from the DCT domain
- * point of view rather than the usual spatial domain point of view,
- * yielding significant computational savings and results at least
- * as good as common bilinear (averaging) spatial downsampling.
- *
- * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
- * lower frequencies and higher frequencies assumed to be zero.
- * It turns out that the computational effort is similar to the 8x8 IDCT
- * regarding the output size.
- * Furthermore, the scaling and descaling is the same for all IDCT sizes.
- *
- * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
- * since there would be too many additional constants to pre-calculate.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-#ifdef DCT_ISLOW_SUPPORTED
-
-
-/*
- * This module is specialized to the case DCTSIZE = 8.
- */
-
-#if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
-#endif
-
-
-/*
- * The poop on this scaling stuff is as follows:
- *
- * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
- * larger than the true IDCT outputs.  The final outputs are therefore
- * a factor of N larger than desired; since N=8 this can be cured by
- * a simple right shift at the end of the algorithm.  The advantage of
- * this arrangement is that we save two multiplications per 1-D IDCT,
- * because the y0 and y4 inputs need not be divided by sqrt(N).
- *
- * We have to do addition and subtraction of the integer inputs, which
- * is no problem, and multiplication by fractional constants, which is
- * a problem to do in integer arithmetic.  We multiply all the constants
- * by CONST_SCALE and convert them to integer constants (thus retaining
- * CONST_BITS bits of precision in the constants).  After doing a
- * multiplication we have to divide the product by CONST_SCALE, with proper
- * rounding, to produce the correct output.  This division can be done
- * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
- * as long as possible so that partial sums can be added together with
- * full fractional precision.
- *
- * The outputs of the first pass are scaled up by PASS1_BITS bits so that
- * they are represented to better-than-integral precision.  These outputs
- * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
- * with the recommended scaling.  (To scale up 12-bit sample data further, an
- * intermediate INT32 array would be needed.)
- *
- * To avoid overflow of the 32-bit intermediate results in pass 2, we must
- * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
- * shows that the values given below are the most effective.
- */
-
-#if BITS_IN_JSAMPLE == 8
-#define CONST_BITS  13
-#define PASS1_BITS  2
-#else
-#define CONST_BITS  13
-#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
-#endif
-
-/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
- * causing a lot of useless floating-point operations at run time.
- * To get around this we use the following pre-calculated constants.
- * If you change CONST_BITS you may want to add appropriate values.
- * (With a reasonable C compiler, you can just rely on the FIX() macro...)
- */
-
-#if CONST_BITS == 13
-#define FIX_0_298631336  ((INT32)  2446)	/* FIX(0.298631336) */
-#define FIX_0_390180644  ((INT32)  3196)	/* FIX(0.390180644) */
-#define FIX_0_541196100  ((INT32)  4433)	/* FIX(0.541196100) */
-#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
-#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
-#define FIX_1_175875602  ((INT32)  9633)	/* FIX(1.175875602) */
-#define FIX_1_501321110  ((INT32)  12299)	/* FIX(1.501321110) */
-#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
-#define FIX_1_961570560  ((INT32)  16069)	/* FIX(1.961570560) */
-#define FIX_2_053119869  ((INT32)  16819)	/* FIX(2.053119869) */
-#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
-#define FIX_3_072711026  ((INT32)  25172)	/* FIX(3.072711026) */
-#else
-#define FIX_0_298631336  FIX(0.298631336)
-#define FIX_0_390180644  FIX(0.390180644)
-#define FIX_0_541196100  FIX(0.541196100)
-#define FIX_0_765366865  FIX(0.765366865)
-#define FIX_0_899976223  FIX(0.899976223)
-#define FIX_1_175875602  FIX(1.175875602)
-#define FIX_1_501321110  FIX(1.501321110)
-#define FIX_1_847759065  FIX(1.847759065)
-#define FIX_1_961570560  FIX(1.961570560)
-#define FIX_2_053119869  FIX(2.053119869)
-#define FIX_2_562915447  FIX(2.562915447)
-#define FIX_3_072711026  FIX(3.072711026)
-#endif
-
-
-/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
- * For 8-bit samples with the recommended scaling, all the variable
- * and constant values involved are no more than 16 bits wide, so a
- * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
- * For 12-bit samples, a full 32-bit multiplication will be needed.
- */
-
-#if BITS_IN_JSAMPLE == 8
-#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
-#else
-#define MULTIPLY(var,const)  ((var) * (const))
-#endif
-
-
-/* Dequantize a coefficient by multiplying it by the multiplier-table
- * entry; produce an int result.  In this module, both inputs and result
- * are 16 bits or less, so either int or short multiply will work.
- */
-
-#define DEQUANTIZE(coef,quantval)  (((ISLOW_MULT_TYPE) (coef)) * (quantval))
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients.
- *
- * Optimized algorithm with 12 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/16).
- */
-
-GLOBAL(void)
-jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[DCTSIZE2];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * Note results are scaled up by sqrt(8) compared to a true IDCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = DCTSIZE; ctr > 0; ctr--) {
-    /* Due to quantization, we will usually find that many of the input
-     * coefficients are zero, especially the AC terms.  We can exploit this
-     * by short-circuiting the IDCT calculation for any column in which all
-     * the AC terms are zero.  In that case each output is equal to the
-     * DC coefficient (with scale factor as needed).
-     * With typical images and quantization tables, half or more of the
-     * column DCT calculations can be simplified this way.
-     */
-
-    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-	inptr[DCTSIZE*7] == 0) {
-      /* AC terms all zero */
-      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
-
-      wsptr[DCTSIZE*0] = dcval;
-      wsptr[DCTSIZE*1] = dcval;
-      wsptr[DCTSIZE*2] = dcval;
-      wsptr[DCTSIZE*3] = dcval;
-      wsptr[DCTSIZE*4] = dcval;
-      wsptr[DCTSIZE*5] = dcval;
-      wsptr[DCTSIZE*6] = dcval;
-      wsptr[DCTSIZE*7] = dcval;
-
-      inptr++;			/* advance pointers to next column */
-      quantptr++;
-      wsptr++;
-      continue;
-    }
-
-    /* Even part: reverse the even part of the forward DCT.
-     * The rotator is c(-6).
-     */
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z2 <<= CONST_BITS;
-    z3 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    tmp0 = z2 + z3;
-    tmp1 = z2 - z3;
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
-
-    tmp10 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-    tmp11 = tmp1 + tmp3;
-    tmp12 = tmp1 - tmp3;
-
-    /* Odd part per figure 8; the matrix is unitary and hence its
-     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
-     */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-
-    z2 = tmp0 + tmp2;
-    z3 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
-    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
-    z2 += z1;
-    z3 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
-    tmp0 += z1 + z2;
-    tmp3 += z1 + z3;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
-    tmp1 += z1 + z3;
-    tmp2 += z1 + z2;
-
-    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-
-    wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
-
-    inptr++;			/* advance pointers to next column */
-    quantptr++;
-    wsptr++;
-  }
-
-  /* Pass 2: process rows from work array, store into output array.
-   * Note that we must descale the results by a factor of 8 == 2**3,
-   * and also undo the PASS1_BITS scaling.
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z2 = (INT32) wsptr[0] +
-	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (ONE << (PASS1_BITS+2)));
-
-    /* Rows of zeroes can be exploited in the same way as we did with columns.
-     * However, the column calculation has created many nonzero AC terms, so
-     * the simplification applies less often (typically 5% to 10% of the time).
-     * On machines with very fast multiplication, it's possible that the
-     * test takes more time than it's worth.  In that case this section
-     * may be commented out.
-     */
-
-#ifndef NO_ZERO_ROW_TEST
-    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
-	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
-      /* AC terms all zero */
-      JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS1_BITS+3)
-				  & RANGE_MASK];
-
-      outptr[0] = dcval;
-      outptr[1] = dcval;
-      outptr[2] = dcval;
-      outptr[3] = dcval;
-      outptr[4] = dcval;
-      outptr[5] = dcval;
-      outptr[6] = dcval;
-      outptr[7] = dcval;
-
-      wsptr += DCTSIZE;		/* advance pointer to next row */
-      continue;
-    }
-#endif
-
-    /* Even part: reverse the even part of the forward DCT.
-     * The rotator is c(-6).
-     */
-
-    z3 = (INT32) wsptr[4];
-
-    tmp0 = (z2 + z3) << CONST_BITS;
-    tmp1 = (z2 - z3) << CONST_BITS;
-
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[6];
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
-
-    tmp10 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-    tmp11 = tmp1 + tmp3;
-    tmp12 = tmp1 - tmp3;
-
-    /* Odd part per figure 8; the matrix is unitary and hence its
-     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
-     */
-
-    tmp0 = (INT32) wsptr[7];
-    tmp1 = (INT32) wsptr[5];
-    tmp2 = (INT32) wsptr[3];
-    tmp3 = (INT32) wsptr[1];
-
-    z2 = tmp0 + tmp2;
-    z3 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
-    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
-    z2 += z1;
-    z3 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
-    tmp0 += z1 + z2;
-    tmp3 += z1 + z3;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
-    tmp1 += z1 + z3;
-    tmp2 += z1 + z2;
-
-    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += DCTSIZE;		/* advance pointer to next row */
-  }
-}
-
-#ifdef IDCT_SCALING_SUPPORTED
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a reduced-size 7x7 output block.
- *
- * Optimized algorithm with 12 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/14).
- */
-
-GLOBAL(void)
-jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[7*7];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp13 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
-    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
-    tmp0 = z1 + z3;
-    z2 -= tmp0;
-    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
-    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
-    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
-    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-
-    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
-    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
-    tmp0 = tmp1 - tmp2;
-    tmp1 += tmp2;
-    tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
-    tmp1 += tmp2;
-    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
-    tmp0 += z2;
-    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
-
-    /* Final output stage */
-
-    wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 7 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 7; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp13 = (INT32) wsptr[0] +
-	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	       (ONE << (PASS1_BITS+2)));
-    tmp13 <<= CONST_BITS;
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[4];
-    z3 = (INT32) wsptr[6];
-
-    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
-    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
-    tmp0 = z1 + z3;
-    z2 -= tmp0;
-    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
-    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
-    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
-    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-
-    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
-    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
-    tmp0 = tmp1 - tmp2;
-    tmp1 += tmp2;
-    tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
-    tmp1 += tmp2;
-    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
-    tmp0 += z2;
-    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 7;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a reduced-size 6x6 output block.
- *
- * Optimized algorithm with 3 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/12).
- */
-
-GLOBAL(void)
-jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[6*6];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp0 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
-    tmp1 = tmp0 + tmp10;
-    tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
-    tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
-    tmp10 = tmp1 + tmp0;
-    tmp12 = tmp1 - tmp0;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
-    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
-    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
-    tmp1 = (z1 - z2 - z3) << PASS1_BITS;
-
-    /* Final output stage */
-
-    wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[6*1] = (int) (tmp11 + tmp1);
-    wsptr[6*4] = (int) (tmp11 - tmp1);
-    wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 6 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 6; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp0 = (INT32) wsptr[0] +
-	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	      (ONE << (PASS1_BITS+2)));
-    tmp0 <<= CONST_BITS;
-    tmp2 = (INT32) wsptr[4];
-    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
-    tmp1 = tmp0 + tmp10;
-    tmp11 = tmp0 - tmp10 - tmp10;
-    tmp10 = (INT32) wsptr[2];
-    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
-    tmp10 = tmp1 + tmp0;
-    tmp12 = tmp1 - tmp0;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
-    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
-    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
-    tmp1 = (z1 - z2 - z3) << CONST_BITS;
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 6;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a reduced-size 5x5 output block.
- *
- * Optimized algorithm with 5 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/10).
- */
-
-GLOBAL(void)
-jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[5*5];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp12 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
-    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
-    z3 = tmp12 + z2;
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z1;
-    tmp12 -= z2 << 2;
-
-    /* Odd part */
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
-    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
-    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
-
-    /* Final output stage */
-
-    wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 5 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 5; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp12 = (INT32) wsptr[0] +
-	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	       (ONE << (PASS1_BITS+2)));
-    tmp12 <<= CONST_BITS;
-    tmp0 = (INT32) wsptr[2];
-    tmp1 = (INT32) wsptr[4];
-    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
-    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
-    z3 = tmp12 + z2;
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z1;
-    tmp12 -= z2 << 2;
-
-    /* Odd part */
-
-    z2 = (INT32) wsptr[1];
-    z3 = (INT32) wsptr[3];
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
-    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
-    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 5;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a reduced-size 4x4 output block.
- *
- * Optimized algorithm with 3 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
- */
-
-GLOBAL(void)
-jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp2, tmp10, tmp12;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[4*4];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    
-    tmp10 = (tmp0 + tmp2) << PASS1_BITS;
-    tmp12 = (tmp0 - tmp2) << PASS1_BITS;
-
-    /* Odd part */
-    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);               /* c6 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
-		       CONST_BITS-PASS1_BITS);
-    tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
-		       CONST_BITS-PASS1_BITS);
-
-    /* Final output stage */
-
-    wsptr[4*0] = (int) (tmp10 + tmp0);
-    wsptr[4*3] = (int) (tmp10 - tmp0);
-    wsptr[4*1] = (int) (tmp12 + tmp2);
-    wsptr[4*2] = (int) (tmp12 - tmp2);
-  }
-
-  /* Pass 2: process 4 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 4; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp0 = (INT32) wsptr[0] +
-	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	      (ONE << (PASS1_BITS+2)));
-    tmp2 = (INT32) wsptr[2];
-
-    tmp10 = (tmp0 + tmp2) << CONST_BITS;
-    tmp12 = (tmp0 - tmp2) << CONST_BITS;
-
-    /* Odd part */
-    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
-
-    z2 = (INT32) wsptr[1];
-    z3 = (INT32) wsptr[3];
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
-    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
-    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 4;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a reduced-size 3x3 output block.
- *
- * Optimized algorithm with 2 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/6).
- */
-
-GLOBAL(void)
-jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp2, tmp10, tmp12;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[3*3];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp0 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
-    tmp10 = tmp0 + tmp12;
-    tmp2 = tmp0 - tmp12 - tmp12;
-
-    /* Odd part */
-
-    tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
-
-    /* Final output stage */
-
-    wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 3 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 3; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp0 = (INT32) wsptr[0] +
-	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	      (ONE << (PASS1_BITS+2)));
-    tmp0 <<= CONST_BITS;
-    tmp2 = (INT32) wsptr[2];
-    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
-    tmp10 = tmp0 + tmp12;
-    tmp2 = tmp0 - tmp12 - tmp12;
-
-    /* Odd part */
-
-    tmp12 = (INT32) wsptr[1];
-    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 3;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a reduced-size 2x2 output block.
- *
- * Multiplication-less algorithm.
- */
-
-GLOBAL(void)
-jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-  ISLOW_MULT_TYPE * quantptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  ISHIFT_TEMPS
-
-  /* Pass 1: process columns from input. */
-
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-
-  /* Column 0 */
-  tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
-  tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
-  /* Add range center and fudge factor for final descale and range-limit. */
-  tmp4 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
-
-  tmp0 = tmp4 + tmp5;
-  tmp2 = tmp4 - tmp5;
-
-  /* Column 1 */
-  tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
-  tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
-
-  tmp1 = tmp4 + tmp5;
-  tmp3 = tmp4 - tmp5;
-
-  /* Pass 2: process 2 rows, store into output array. */
-
-  /* Row 0 */
-  outptr = output_buf[0] + output_col;
-
-  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
-  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
-
-  /* Row 1 */
-  outptr = output_buf[1] + output_col;
-
-  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK];
-  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK];
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a reduced-size 1x1 output block.
- *
- * We hardly need an inverse DCT routine for this: just take the
- * average pixel value, which is one-eighth of the DC coefficient.
- */
-
-GLOBAL(void)
-jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  DCTELEM dcval;
-  ISLOW_MULT_TYPE * quantptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  ISHIFT_TEMPS
-
-  /* 1x1 is trivial: just take the DC coefficient divided by 8. */
-
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-
-  dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
-  /* Add range center and fudge factor for descale and range-limit. */
-  dcval += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
-
-  output_buf[0][output_col] =
-    range_limit[(int) IRIGHT_SHIFT(dcval, 3) & RANGE_MASK];
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 9x9 output block.
- *
- * Optimized algorithm with 10 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/18).
- */
-
-GLOBAL(void)
-jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*9];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp0 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
-    tmp1 = tmp0 + tmp3;
-    tmp2 = tmp0 - tmp3 - tmp3;
-
-    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
-    tmp11 = tmp2 + tmp0;
-    tmp14 = tmp2 - tmp0 - tmp0;
-
-    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
-    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
-    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
-
-    tmp10 = tmp1 + tmp0 - tmp3;
-    tmp12 = tmp1 - tmp0 + tmp2;
-    tmp13 = tmp1 - tmp2 + tmp3;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
-
-    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
-    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
-    tmp0 = tmp2 + tmp3 - z2;
-    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
-    tmp2 += z2 - tmp1;
-    tmp3 += z2 + tmp1;
-    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
-
-    /* Final output stage */
-
-    wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 9 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 9; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp0 = (INT32) wsptr[0] +
-	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	      (ONE << (PASS1_BITS+2)));
-    tmp0 <<= CONST_BITS;
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[4];
-    z3 = (INT32) wsptr[6];
-
-    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
-    tmp1 = tmp0 + tmp3;
-    tmp2 = tmp0 - tmp3 - tmp3;
-
-    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
-    tmp11 = tmp2 + tmp0;
-    tmp14 = tmp2 - tmp0 - tmp0;
-
-    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
-    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
-    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
-
-    tmp10 = tmp1 + tmp0 - tmp3;
-    tmp12 = tmp1 - tmp0 + tmp2;
-    tmp13 = tmp1 - tmp2 + tmp3;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-
-    z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
-
-    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
-    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
-    tmp0 = tmp2 + tmp3 - z2;
-    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
-    tmp2 += z2 - tmp1;
-    tmp3 += z2 + tmp1;
-    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 10x10 output block.
- *
- * Optimized algorithm with 12 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/20).
- */
-
-GLOBAL(void)
-jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
-  INT32 z1, z2, z3, z4, z5;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*10];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z3 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
-    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z2;
-
-    tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1),   /* c0 = (c4-c8)*2 */
-			CONST_BITS-PASS1_BITS);
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
-    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
-    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
-
-    tmp20 = tmp10 + tmp12;
-    tmp24 = tmp10 - tmp12;
-    tmp21 = tmp11 + tmp13;
-    tmp23 = tmp11 - tmp13;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = z2 + z4;
-    tmp13 = z2 - z4;
-
-    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
-    z5 = z3 << CONST_BITS;
-
-    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
-    z4 = z5 + tmp12;
-
-    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
-    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
-
-    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
-    z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
-
-    tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
-
-    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
-    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
-
-    /* Final output stage */
-
-    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*2] = (int) (tmp22 + tmp12);
-    wsptr[8*7] = (int) (tmp22 - tmp12);
-    wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 10 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 10; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z3 = (INT32) wsptr[0] +
-	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (ONE << (PASS1_BITS+2)));
-    z3 <<= CONST_BITS;
-    z4 = (INT32) wsptr[4];
-    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
-    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z2;
-
-    tmp22 = z3 - ((z1 - z2) << 1);               /* c0 = (c4-c8)*2 */
-
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[6];
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
-    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
-    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
-
-    tmp20 = tmp10 + tmp12;
-    tmp24 = tmp10 - tmp12;
-    tmp21 = tmp11 + tmp13;
-    tmp23 = tmp11 - tmp13;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z3 <<= CONST_BITS;
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = z2 + z4;
-    tmp13 = z2 - z4;
-
-    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
-
-    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
-    z4 = z3 + tmp12;
-
-    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
-    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
-
-    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
-    z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
-
-    tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
-
-    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
-    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing an 11x11 output block.
- *
- * Optimized algorithm with 24 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/22).
- */
-
-GLOBAL(void)
-jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*11];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp10 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
-    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
-    z4 = z1 + z3;
-    tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
-    z4 -= z2;
-    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
-    tmp21 = tmp20 + tmp23 + tmp25 -
-	    MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
-    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
-    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
-    tmp24 += tmp25;
-    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
-    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
-	     MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
-    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = z1 + z2;
-    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
-    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
-    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
-    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
-    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
-    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
-    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
-    z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
-    tmp11 += z1;
-    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
-    tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
-	     MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
-	     MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
-
-    /* Final output stage */
-
-    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 11 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 11; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp10 = (INT32) wsptr[0] +
-	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	       (ONE << (PASS1_BITS+2)));
-    tmp10 <<= CONST_BITS;
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[4];
-    z3 = (INT32) wsptr[6];
-
-    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
-    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
-    z4 = z1 + z3;
-    tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
-    z4 -= z2;
-    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
-    tmp21 = tmp20 + tmp23 + tmp25 -
-	    MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
-    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
-    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
-    tmp24 += tmp25;
-    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
-    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
-	     MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
-    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = z1 + z2;
-    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
-    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
-    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
-    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
-    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
-    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
-    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
-    z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
-    tmp11 += z1;
-    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
-    tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
-	     MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
-	     MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 12x12 output block.
- *
- * Optimized algorithm with 15 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/24).
- */
-
-GLOBAL(void)
-jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*12];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z3 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
-
-    tmp10 = z3 + z4;
-    tmp11 = z3 - z4;
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
-    z1 <<= CONST_BITS;
-    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-    z2 <<= CONST_BITS;
-
-    tmp12 = z1 - z2;
-
-    tmp21 = z3 + tmp12;
-    tmp24 = z3 - tmp12;
-
-    tmp12 = z4 + z2;
-
-    tmp20 = tmp10 + tmp12;
-    tmp25 = tmp10 - tmp12;
-
-    tmp12 = z4 - z1 - z2;
-
-    tmp22 = tmp11 + tmp12;
-    tmp23 = tmp11 - tmp12;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
-    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
-
-    tmp10 = z1 + z3;
-    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
-    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
-    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
-    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
-    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
-    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
-    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
-	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
-
-    z1 -= z4;
-    z2 -= z3;
-    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
-    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
-    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
-
-    /* Final output stage */
-
-    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 12 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 12; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z3 = (INT32) wsptr[0] +
-	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (ONE << (PASS1_BITS+2)));
-    z3 <<= CONST_BITS;
-
-    z4 = (INT32) wsptr[4];
-    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
-
-    tmp10 = z3 + z4;
-    tmp11 = z3 - z4;
-
-    z1 = (INT32) wsptr[2];
-    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
-    z1 <<= CONST_BITS;
-    z2 = (INT32) wsptr[6];
-    z2 <<= CONST_BITS;
-
-    tmp12 = z1 - z2;
-
-    tmp21 = z3 + tmp12;
-    tmp24 = z3 - tmp12;
-
-    tmp12 = z4 + z2;
-
-    tmp20 = tmp10 + tmp12;
-    tmp25 = tmp10 - tmp12;
-
-    tmp12 = z4 - z1 - z2;
-
-    tmp22 = tmp11 + tmp12;
-    tmp23 = tmp11 - tmp12;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
-    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
-
-    tmp10 = z1 + z3;
-    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
-    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
-    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
-    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
-    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
-    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
-    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
-	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
-
-    z1 -= z4;
-    z2 -= z3;
-    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
-    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
-    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 13x13 output block.
- *
- * Optimized algorithm with 29 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/26).
- */
-
-GLOBAL(void)
-jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*13];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z1 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp10 = z3 + z4;
-    tmp11 = z3 - z4;
-
-    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
-    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
-
-    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
-    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
-
-    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
-    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
-
-    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
-    tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
-
-    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
-    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
-
-    tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
-    tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
-
-    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
-    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
-    tmp15 = z1 + z4;
-    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
-    tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
-    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
-    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
-    tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
-    tmp11 += tmp14;
-    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
-    tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
-    tmp12 += tmp14;
-    tmp13 += tmp14;
-    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
-    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
-	    MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
-    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
-    tmp14 += z1;
-    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
-	     MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
-
-    /* Final output stage */
-
-    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 13 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 13; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z1 = (INT32) wsptr[0] +
-	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (ONE << (PASS1_BITS+2)));
-    z1 <<= CONST_BITS;
-
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[4];
-    z4 = (INT32) wsptr[6];
-
-    tmp10 = z3 + z4;
-    tmp11 = z3 - z4;
-
-    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
-    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
-
-    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
-    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
-
-    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
-    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
-
-    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
-    tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
-
-    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
-    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
-
-    tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
-    tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
-
-    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
-    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
-    tmp15 = z1 + z4;
-    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
-    tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
-    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
-    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
-    tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
-    tmp11 += tmp14;
-    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
-    tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
-    tmp12 += tmp14;
-    tmp13 += tmp14;
-    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
-    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
-	    MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
-    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
-    tmp14 += z1;
-    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
-	     MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 14x14 output block.
- *
- * Optimized algorithm with 20 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/28).
- */
-
-GLOBAL(void)
-jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*14];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z1 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
-    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
-    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
-
-    tmp10 = z1 + z2;
-    tmp11 = z1 + z3;
-    tmp12 = z1 - z4;
-
-    tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
-			CONST_BITS-PASS1_BITS);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
-
-    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
-    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
-    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
-	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
-
-    tmp20 = tmp10 + tmp13;
-    tmp26 = tmp10 - tmp13;
-    tmp21 = tmp11 + tmp14;
-    tmp25 = tmp11 - tmp14;
-    tmp22 = tmp12 + tmp15;
-    tmp24 = tmp12 - tmp15;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-    tmp13 = z4 << CONST_BITS;
-
-    tmp14 = z1 + z3;
-    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
-    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
-    tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
-    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
-    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
-    z1    -= z2;
-    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
-    tmp16 += tmp15;
-    z1    += z4;
-    z4    = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
-    tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
-    tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
-    z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
-    tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
-    tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
-
-    tmp13 = (z1 - z3) << PASS1_BITS;
-
-    /* Final output stage */
-
-    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*3]  = (int) (tmp23 + tmp13);
-    wsptr[8*10] = (int) (tmp23 - tmp13);
-    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
-    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 14 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 14; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z1 = (INT32) wsptr[0] +
-	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (ONE << (PASS1_BITS+2)));
-    z1 <<= CONST_BITS;
-    z4 = (INT32) wsptr[4];
-    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
-    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
-    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
-
-    tmp10 = z1 + z2;
-    tmp11 = z1 + z3;
-    tmp12 = z1 - z4;
-
-    tmp23 = z1 - ((z2 + z3 - z4) << 1);          /* c0 = (c4+c12-c8)*2 */
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[6];
-
-    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
-
-    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
-    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
-    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
-	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
-
-    tmp20 = tmp10 + tmp13;
-    tmp26 = tmp10 - tmp13;
-    tmp21 = tmp11 + tmp14;
-    tmp25 = tmp11 - tmp14;
-    tmp22 = tmp12 + tmp15;
-    tmp24 = tmp12 - tmp15;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-    z4 <<= CONST_BITS;
-
-    tmp14 = z1 + z3;
-    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
-    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
-    tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
-    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
-    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
-    z1    -= z2;
-    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
-    tmp16 += tmp15;
-    tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4;    /* -c13 */
-    tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
-    tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
-    tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
-    tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
-    tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
-
-    tmp13 = ((z1 - z3) << CONST_BITS) + z4;
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 15x15 output block.
- *
- * Optimized algorithm with 22 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/30).
- */
-
-GLOBAL(void)
-jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*15];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z1 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
-    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
-
-    tmp12 = z1 - tmp10;
-    tmp13 = z1 + tmp11;
-    z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
-
-    z4 = z2 - z3;
-    z3 += z2;
-    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
-    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
-    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
-
-    tmp20 = tmp13 + tmp10 + tmp11;
-    tmp23 = tmp12 - tmp10 + tmp11 + z2;
-
-    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
-    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
-
-    tmp25 = tmp13 - tmp10 - tmp11;
-    tmp26 = tmp12 + tmp10 - tmp11 - z2;
-
-    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
-    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
-
-    tmp21 = tmp12 + tmp10 + tmp11;
-    tmp24 = tmp13 - tmp10 + tmp11;
-    tmp11 += tmp11;
-    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
-    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp13 = z2 - z4;
-    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
-    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
-    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
-
-    tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
-    tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
-    z2 = z1 - z4;
-    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
-
-    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
-    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
-    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
-    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
-    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
-    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
-
-    /* Final output stage */
-
-    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
-    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
-    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 15 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 15; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z1 = (INT32) wsptr[0] +
-	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (ONE << (PASS1_BITS+2)));
-    z1 <<= CONST_BITS;
-
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[4];
-    z4 = (INT32) wsptr[6];
-
-    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
-    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
-
-    tmp12 = z1 - tmp10;
-    tmp13 = z1 + tmp11;
-    z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
-
-    z4 = z2 - z3;
-    z3 += z2;
-    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
-    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
-    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
-
-    tmp20 = tmp13 + tmp10 + tmp11;
-    tmp23 = tmp12 - tmp10 + tmp11 + z2;
-
-    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
-    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
-
-    tmp25 = tmp13 - tmp10 - tmp11;
-    tmp26 = tmp12 + tmp10 - tmp11 - z2;
-
-    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
-    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
-
-    tmp21 = tmp12 + tmp10 + tmp11;
-    tmp24 = tmp13 - tmp10 + tmp11;
-    tmp11 += tmp11;
-    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
-    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z4 = (INT32) wsptr[5];
-    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
-    z4 = (INT32) wsptr[7];
-
-    tmp13 = z2 - z4;
-    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
-    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
-    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
-
-    tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
-    tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
-    z2 = z1 - z4;
-    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
-
-    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
-    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
-    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
-    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
-    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
-    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 16x16 output block.
- *
- * Optimized algorithm with 28 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/32).
- */
-
-GLOBAL(void)
-jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*16];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp0 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
-    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-    z3 = z1 - z2;
-    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
-    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
-
-    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
-    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
-    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
-    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
-
-    tmp20 = tmp10 + tmp0;
-    tmp27 = tmp10 - tmp0;
-    tmp21 = tmp12 + tmp1;
-    tmp26 = tmp12 - tmp1;
-    tmp22 = tmp13 + tmp2;
-    tmp25 = tmp13 - tmp2;
-    tmp23 = tmp11 + tmp3;
-    tmp24 = tmp11 - tmp3;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = z1 + z3;
-
-    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
-    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
-    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
-    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
-    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
-    tmp0  = tmp1 + tmp2 + tmp3 -
-	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
-    tmp13 = tmp10 + tmp11 + tmp12 -
-	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
-    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
-    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
-    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
-    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
-    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
-    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
-    z2    += z4;
-    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
-    tmp1  += z1;
-    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
-    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
-    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
-    tmp12 += z2;
-    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
-    tmp2  += z2;
-    tmp3  += z2;
-    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
-    tmp10 += z2;
-    tmp11 += z2;
-
-    /* Final output stage */
-
-    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
-    wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
-    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
-    wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
-    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
-    wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
-    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
-    wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
-    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 16 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 16; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp0 = (INT32) wsptr[0] +
-	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	      (ONE << (PASS1_BITS+2)));
-    tmp0 <<= CONST_BITS;
-
-    z1 = (INT32) wsptr[4];
-    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
-    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[6];
-    z3 = z1 - z2;
-    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
-    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
-
-    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
-    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
-    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
-    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
-
-    tmp20 = tmp10 + tmp0;
-    tmp27 = tmp10 - tmp0;
-    tmp21 = tmp12 + tmp1;
-    tmp26 = tmp12 - tmp1;
-    tmp22 = tmp13 + tmp2;
-    tmp25 = tmp13 - tmp2;
-    tmp23 = tmp11 + tmp3;
-    tmp24 = tmp11 - tmp3;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = z1 + z3;
-
-    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
-    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
-    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
-    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
-    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
-    tmp0  = tmp1 + tmp2 + tmp3 -
-	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
-    tmp13 = tmp10 + tmp11 + tmp12 -
-	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
-    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
-    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
-    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
-    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
-    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
-    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
-    z2    += z4;
-    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
-    tmp1  += z1;
-    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
-    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
-    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
-    tmp12 += z2;
-    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
-    tmp2  += z2;
-    tmp3  += z2;
-    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
-    tmp10 += z2;
-    tmp11 += z2;
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 16x8 output block.
- *
- * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*8];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * Note results are scaled up by sqrt(8) compared to a true IDCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = DCTSIZE; ctr > 0; ctr--) {
-    /* Due to quantization, we will usually find that many of the input
-     * coefficients are zero, especially the AC terms.  We can exploit this
-     * by short-circuiting the IDCT calculation for any column in which all
-     * the AC terms are zero.  In that case each output is equal to the
-     * DC coefficient (with scale factor as needed).
-     * With typical images and quantization tables, half or more of the
-     * column DCT calculations can be simplified this way.
-     */
-
-    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-	inptr[DCTSIZE*7] == 0) {
-      /* AC terms all zero */
-      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
-
-      wsptr[DCTSIZE*0] = dcval;
-      wsptr[DCTSIZE*1] = dcval;
-      wsptr[DCTSIZE*2] = dcval;
-      wsptr[DCTSIZE*3] = dcval;
-      wsptr[DCTSIZE*4] = dcval;
-      wsptr[DCTSIZE*5] = dcval;
-      wsptr[DCTSIZE*6] = dcval;
-      wsptr[DCTSIZE*7] = dcval;
-
-      inptr++;			/* advance pointers to next column */
-      quantptr++;
-      wsptr++;
-      continue;
-    }
-
-    /* Even part: reverse the even part of the forward DCT.
-     * The rotator is c(-6).
-     */
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z2 <<= CONST_BITS;
-    z3 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    tmp0 = z2 + z3;
-    tmp1 = z2 - z3;
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
-
-    tmp10 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-    tmp11 = tmp1 + tmp3;
-    tmp12 = tmp1 - tmp3;
-
-    /* Odd part per figure 8; the matrix is unitary and hence its
-     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
-     */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-
-    z2 = tmp0 + tmp2;
-    z3 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
-    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
-    z2 += z1;
-    z3 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
-    tmp0 += z1 + z2;
-    tmp3 += z1 + z3;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
-    tmp1 += z1 + z3;
-    tmp2 += z1 + z2;
-
-    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-
-    wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
-
-    inptr++;			/* advance pointers to next column */
-    quantptr++;
-    wsptr++;
-  }
-
-  /* Pass 2: process 8 rows from work array, store into output array.
-   * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp0 = (INT32) wsptr[0] +
-	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	      (ONE << (PASS1_BITS+2)));
-    tmp0 <<= CONST_BITS;
-
-    z1 = (INT32) wsptr[4];
-    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
-    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[6];
-    z3 = z1 - z2;
-    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
-    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
-
-    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
-    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
-    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
-    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
-
-    tmp20 = tmp10 + tmp0;
-    tmp27 = tmp10 - tmp0;
-    tmp21 = tmp12 + tmp1;
-    tmp26 = tmp12 - tmp1;
-    tmp22 = tmp13 + tmp2;
-    tmp25 = tmp13 - tmp2;
-    tmp23 = tmp11 + tmp3;
-    tmp24 = tmp11 - tmp3;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = z1 + z3;
-
-    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
-    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
-    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
-    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
-    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
-    tmp0  = tmp1 + tmp2 + tmp3 -
-	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
-    tmp13 = tmp10 + tmp11 + tmp12 -
-	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
-    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
-    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
-    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
-    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
-    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
-    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
-    z2    += z4;
-    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
-    tmp1  += z1;
-    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
-    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
-    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
-    tmp12 += z2;
-    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
-    tmp2  += z2;
-    tmp3  += z2;
-    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
-    tmp10 += z2;
-    tmp11 += z2;
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 14x7 output block.
- *
- * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*7];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp23 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp23 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734));       /* c4 */
-    tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123));       /* c6 */
-    tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
-    tmp10 = z1 + z3;
-    z2 -= tmp10;
-    tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
-    tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536));   /* c2-c4-c6 */
-    tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249));   /* c2+c4+c6 */
-    tmp23 += MULTIPLY(z2, FIX(1.414213562));           /* c0 */
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-
-    tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347));       /* (c3+c1-c5)/2 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339));       /* (c3+c5-c1)/2 */
-    tmp10 = tmp11 - tmp12;
-    tmp11 += tmp12;
-    tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276));     /* -c1 */
-    tmp11 += tmp12;
-    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));          /* c5 */
-    tmp10 += z2;
-    tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693));      /* c3+c1-c5 */
-
-    /* Final output stage */
-
-    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 7 rows from work array, store into output array.
-   * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 7; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z1 = (INT32) wsptr[0] +
-	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (ONE << (PASS1_BITS+2)));
-    z1 <<= CONST_BITS;
-    z4 = (INT32) wsptr[4];
-    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
-    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
-    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
-
-    tmp10 = z1 + z2;
-    tmp11 = z1 + z3;
-    tmp12 = z1 - z4;
-
-    tmp23 = z1 - ((z2 + z3 - z4) << 1);          /* c0 = (c4+c12-c8)*2 */
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[6];
-
-    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
-
-    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
-    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
-    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
-	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
-
-    tmp20 = tmp10 + tmp13;
-    tmp26 = tmp10 - tmp13;
-    tmp21 = tmp11 + tmp14;
-    tmp25 = tmp11 - tmp14;
-    tmp22 = tmp12 + tmp15;
-    tmp24 = tmp12 - tmp15;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-    z4 <<= CONST_BITS;
-
-    tmp14 = z1 + z3;
-    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
-    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
-    tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
-    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
-    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
-    z1    -= z2;
-    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
-    tmp16 += tmp15;
-    tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4;    /* -c13 */
-    tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
-    tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
-    tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
-    tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
-    tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
-
-    tmp13 = ((z1 - z3) << CONST_BITS) + z4;
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 12x6 output block.
- *
- * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*6];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp10 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp20 = MULTIPLY(tmp12, FIX(0.707106781));   /* c4 */
-    tmp11 = tmp10 + tmp20;
-    tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS);
-    tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp10 = MULTIPLY(tmp20, FIX(1.224744871));   /* c2 */
-    tmp20 = tmp11 + tmp10;
-    tmp22 = tmp11 - tmp10;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
-    tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
-    tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
-    tmp11 = (z1 - z2 - z3) << PASS1_BITS;
-
-    /* Final output stage */
-
-    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1] = (int) (tmp21 + tmp11);
-    wsptr[8*4] = (int) (tmp21 - tmp11);
-    wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 6 rows from work array, store into output array.
-   * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 6; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z3 = (INT32) wsptr[0] +
-	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (ONE << (PASS1_BITS+2)));
-    z3 <<= CONST_BITS;
-
-    z4 = (INT32) wsptr[4];
-    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
-
-    tmp10 = z3 + z4;
-    tmp11 = z3 - z4;
-
-    z1 = (INT32) wsptr[2];
-    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
-    z1 <<= CONST_BITS;
-    z2 = (INT32) wsptr[6];
-    z2 <<= CONST_BITS;
-
-    tmp12 = z1 - z2;
-
-    tmp21 = z3 + tmp12;
-    tmp24 = z3 - tmp12;
-
-    tmp12 = z4 + z2;
-
-    tmp20 = tmp10 + tmp12;
-    tmp25 = tmp10 - tmp12;
-
-    tmp12 = z4 - z1 - z2;
-
-    tmp22 = tmp11 + tmp12;
-    tmp23 = tmp11 - tmp12;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
-    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
-
-    tmp10 = z1 + z3;
-    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
-    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
-    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
-    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
-    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
-    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
-    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
-	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
-
-    z1 -= z4;
-    z2 -= z3;
-    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
-    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
-    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 10x5 output block.
- *
- * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*5];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp12 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
-    z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
-    z3 = tmp12 + z2;
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z1;
-    tmp12 -= z2 << 2;
-
-    /* Odd part */
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));       /* c3 */
-    tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148));    /* c1-c3 */
-    tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899));    /* c1+c3 */
-
-    /* Final output stage */
-
-    wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 5 rows from work array, store into output array.
-   * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 5; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z3 = (INT32) wsptr[0] +
-	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (ONE << (PASS1_BITS+2)));
-    z3 <<= CONST_BITS;
-    z4 = (INT32) wsptr[4];
-    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
-    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z2;
-
-    tmp22 = z3 - ((z1 - z2) << 1);               /* c0 = (c4-c8)*2 */
-
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[6];
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
-    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
-    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
-
-    tmp20 = tmp10 + tmp12;
-    tmp24 = tmp10 - tmp12;
-    tmp21 = tmp11 + tmp13;
-    tmp23 = tmp11 - tmp13;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z3 <<= CONST_BITS;
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = z2 + z4;
-    tmp13 = z2 - z4;
-
-    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
-
-    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
-    z4 = z3 + tmp12;
-
-    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
-    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
-
-    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
-    z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
-
-    tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
-
-    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
-    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing an 8x4 output block.
- *
- * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*4];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 4-point IDCT kernel,
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-
-    tmp10 = (tmp0 + tmp2) << PASS1_BITS;
-    tmp12 = (tmp0 - tmp2) << PASS1_BITS;
-
-    /* Odd part */
-    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);               /* c6 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
-		       CONST_BITS-PASS1_BITS);
-    tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
-		       CONST_BITS-PASS1_BITS);
-
-    /* Final output stage */
-
-    wsptr[8*0] = (int) (tmp10 + tmp0);
-    wsptr[8*3] = (int) (tmp10 - tmp0);
-    wsptr[8*1] = (int) (tmp12 + tmp2);
-    wsptr[8*2] = (int) (tmp12 - tmp2);
-  }
-
-  /* Pass 2: process rows from work array, store into output array.
-   * Note that we must descale the results by a factor of 8 == 2**3,
-   * and also undo the PASS1_BITS scaling.
-   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 4; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part: reverse the even part of the forward DCT.
-     * The rotator is c(-6).
-     */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z2 = (INT32) wsptr[0] +
-	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (ONE << (PASS1_BITS+2)));
-    z3 = (INT32) wsptr[4];
-
-    tmp0 = (z2 + z3) << CONST_BITS;
-    tmp1 = (z2 - z3) << CONST_BITS;
-
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[6];
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
-
-    tmp10 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-    tmp11 = tmp1 + tmp3;
-    tmp12 = tmp1 - tmp3;
-
-    /* Odd part per figure 8; the matrix is unitary and hence its
-     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
-     */
-
-    tmp0 = (INT32) wsptr[7];
-    tmp1 = (INT32) wsptr[5];
-    tmp2 = (INT32) wsptr[3];
-    tmp3 = (INT32) wsptr[1];
-
-    z2 = tmp0 + tmp2;
-    z3 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
-    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
-    z2 += z1;
-    z3 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
-    tmp0 += z1 + z2;
-    tmp3 += z1 + z3;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
-    tmp1 += z1 + z3;
-    tmp2 += z1 + z2;
-
-    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += DCTSIZE;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 6x3 output block.
- *
- * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[6*3];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp0 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
-    tmp10 = tmp0 + tmp12;
-    tmp2 = tmp0 - tmp12 - tmp12;
-
-    /* Odd part */
-
-    tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
-
-    /* Final output stage */
-
-    wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
-  }
-  
-  /* Pass 2: process 3 rows from work array, store into output array.
-   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 3; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp0 = (INT32) wsptr[0] +
-	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	      (ONE << (PASS1_BITS+2)));
-    tmp0 <<= CONST_BITS;
-    tmp2 = (INT32) wsptr[4];
-    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
-    tmp1 = tmp0 + tmp10;
-    tmp11 = tmp0 - tmp10 - tmp10;
-    tmp10 = (INT32) wsptr[2];
-    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
-    tmp10 = tmp1 + tmp0;
-    tmp12 = tmp1 - tmp0;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
-    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
-    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
-    tmp1 = (z1 - z2 - z3) << CONST_BITS;
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 6;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 4x2 output block.
- *
- * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp2, tmp10, tmp12;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  INT32 * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  INT32 workspace[4*2];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-
-    /* Odd part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-
-    /* Final output stage */
-
-    wsptr[4*0] = tmp10 + tmp0;
-    wsptr[4*1] = tmp10 - tmp0;
-  }
-
-  /* Pass 2: process 2 rows from work array, store into output array.
-   * 4-point IDCT kernel,
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 2; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp0 = wsptr[0] + ((((INT32) RANGE_CENTER) << 3) + (ONE << 2));
-    tmp2 = wsptr[2];
-
-    tmp10 = (tmp0 + tmp2) << CONST_BITS;
-    tmp12 = (tmp0 - tmp2) << CONST_BITS;
-
-    /* Odd part */
-    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
-
-    z2 = wsptr[1];
-    z3 = wsptr[3];
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
-    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
-    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
-					      CONST_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
-					      CONST_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 4;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 2x1 output block.
- *
- * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  DCTELEM tmp0, tmp1;
-  ISLOW_MULT_TYPE * quantptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  ISHIFT_TEMPS
-
-  /* Pass 1: empty. */
-
-  /* Pass 2: process 1 row from input, store into output array. */
-
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  outptr = output_buf[0] + output_col;
-
-  /* Even part */
-
-  tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]);
-  /* Add range center and fudge factor for final descale and range-limit. */
-  tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
-
-  /* Odd part */
-
-  tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]);
-
-  /* Final output stage */
-
-  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
-  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing an 8x16 output block.
- *
- * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*16];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp0 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
-    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-    z3 = z1 - z2;
-    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
-    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
-
-    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
-    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
-    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
-    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
-
-    tmp20 = tmp10 + tmp0;
-    tmp27 = tmp10 - tmp0;
-    tmp21 = tmp12 + tmp1;
-    tmp26 = tmp12 - tmp1;
-    tmp22 = tmp13 + tmp2;
-    tmp25 = tmp13 - tmp2;
-    tmp23 = tmp11 + tmp3;
-    tmp24 = tmp11 - tmp3;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = z1 + z3;
-
-    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
-    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
-    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
-    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
-    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
-    tmp0  = tmp1 + tmp2 + tmp3 -
-	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
-    tmp13 = tmp10 + tmp11 + tmp12 -
-	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
-    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
-    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
-    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
-    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
-    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
-    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
-    z2    += z4;
-    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
-    tmp1  += z1;
-    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
-    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
-    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
-    tmp12 += z2;
-    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
-    tmp2  += z2;
-    tmp3  += z2;
-    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
-    tmp10 += z2;
-    tmp11 += z2;
-
-    /* Final output stage */
-
-    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
-    wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
-    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
-    wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
-    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
-    wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
-    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
-    wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
-    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process rows from work array, store into output array.
-   * Note that we must descale the results by a factor of 8 == 2**3,
-   * and also undo the PASS1_BITS scaling.
-   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 16; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part: reverse the even part of the forward DCT.
-     * The rotator is c(-6).
-     */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z2 = (INT32) wsptr[0] +
-	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (ONE << (PASS1_BITS+2)));
-    z3 = (INT32) wsptr[4];
-
-    tmp0 = (z2 + z3) << CONST_BITS;
-    tmp1 = (z2 - z3) << CONST_BITS;
-
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[6];
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
-
-    tmp10 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-    tmp11 = tmp1 + tmp3;
-    tmp12 = tmp1 - tmp3;
-
-    /* Odd part per figure 8; the matrix is unitary and hence its
-     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
-     */
-
-    tmp0 = (INT32) wsptr[7];
-    tmp1 = (INT32) wsptr[5];
-    tmp2 = (INT32) wsptr[3];
-    tmp3 = (INT32) wsptr[1];
-
-    z2 = tmp0 + tmp2;
-    z3 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
-    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
-    z2 += z1;
-    z3 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
-    tmp0 += z1 + z2;
-    tmp3 += z1 + z3;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
-    tmp1 += z1 + z3;
-    tmp2 += z1 + z2;
-
-    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += DCTSIZE;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 7x14 output block.
- *
- * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[7*14];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z1 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
-    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
-    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
-
-    tmp10 = z1 + z2;
-    tmp11 = z1 + z3;
-    tmp12 = z1 - z4;
-
-    tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
-			CONST_BITS-PASS1_BITS);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
-
-    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
-    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
-    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
-	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
-
-    tmp20 = tmp10 + tmp13;
-    tmp26 = tmp10 - tmp13;
-    tmp21 = tmp11 + tmp14;
-    tmp25 = tmp11 - tmp14;
-    tmp22 = tmp12 + tmp15;
-    tmp24 = tmp12 - tmp15;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-    tmp13 = z4 << CONST_BITS;
-
-    tmp14 = z1 + z3;
-    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
-    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
-    tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
-    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
-    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
-    z1    -= z2;
-    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
-    tmp16 += tmp15;
-    z1    += z4;
-    z4    = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
-    tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
-    tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
-    z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
-    tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
-    tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
-
-    tmp13 = (z1 - z3) << PASS1_BITS;
-
-    /* Final output stage */
-
-    wsptr[7*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[7*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[7*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[7*3]  = (int) (tmp23 + tmp13);
-    wsptr[7*10] = (int) (tmp23 - tmp13);
-    wsptr[7*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[7*9]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[7*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[7*8]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[7*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
-    wsptr[7*7]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 14 rows from work array, store into output array.
-   * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 14; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp23 = (INT32) wsptr[0] +
-	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	       (ONE << (PASS1_BITS+2)));
-    tmp23 <<= CONST_BITS;
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[4];
-    z3 = (INT32) wsptr[6];
-
-    tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734));       /* c4 */
-    tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123));       /* c6 */
-    tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
-    tmp10 = z1 + z3;
-    z2 -= tmp10;
-    tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
-    tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536));   /* c2-c4-c6 */
-    tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249));   /* c2+c4+c6 */
-    tmp23 += MULTIPLY(z2, FIX(1.414213562));           /* c0 */
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-
-    tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347));       /* (c3+c1-c5)/2 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339));       /* (c3+c5-c1)/2 */
-    tmp10 = tmp11 - tmp12;
-    tmp11 += tmp12;
-    tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276));     /* -c1 */
-    tmp11 += tmp12;
-    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));          /* c5 */
-    tmp10 += z2;
-    tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693));      /* c3+c1-c5 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 7;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 6x12 output block.
- *
- * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[6*12];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z3 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
-
-    tmp10 = z3 + z4;
-    tmp11 = z3 - z4;
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
-    z1 <<= CONST_BITS;
-    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-    z2 <<= CONST_BITS;
-
-    tmp12 = z1 - z2;
-
-    tmp21 = z3 + tmp12;
-    tmp24 = z3 - tmp12;
-
-    tmp12 = z4 + z2;
-
-    tmp20 = tmp10 + tmp12;
-    tmp25 = tmp10 - tmp12;
-
-    tmp12 = z4 - z1 - z2;
-
-    tmp22 = tmp11 + tmp12;
-    tmp23 = tmp11 - tmp12;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
-    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
-
-    tmp10 = z1 + z3;
-    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
-    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
-    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
-    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
-    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
-    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
-    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
-	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
-
-    z1 -= z4;
-    z2 -= z3;
-    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
-    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
-    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
-
-    /* Final output stage */
-
-    wsptr[6*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[6*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[6*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[6*9]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[6*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[6*8]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[6*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[6*7]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[6*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[6*6]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 12 rows from work array, store into output array.
-   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 12; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp10 = (INT32) wsptr[0] +
-	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	       (ONE << (PASS1_BITS+2)));
-    tmp10 <<= CONST_BITS;
-    tmp12 = (INT32) wsptr[4];
-    tmp20 = MULTIPLY(tmp12, FIX(0.707106781));   /* c4 */
-    tmp11 = tmp10 + tmp20;
-    tmp21 = tmp10 - tmp20 - tmp20;
-    tmp20 = (INT32) wsptr[2];
-    tmp10 = MULTIPLY(tmp20, FIX(1.224744871));   /* c2 */
-    tmp20 = tmp11 + tmp10;
-    tmp22 = tmp11 - tmp10;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
-    tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
-    tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
-    tmp11 = (z1 - z2 - z3) << CONST_BITS;
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 6;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 5x10 output block.
- *
- * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
-  INT32 z1, z2, z3, z4, z5;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[5*10];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z3 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
-    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z2;
-
-    tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1),   /* c0 = (c4-c8)*2 */
-			CONST_BITS-PASS1_BITS);
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
-    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
-    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
-
-    tmp20 = tmp10 + tmp12;
-    tmp24 = tmp10 - tmp12;
-    tmp21 = tmp11 + tmp13;
-    tmp23 = tmp11 - tmp13;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = z2 + z4;
-    tmp13 = z2 - z4;
-
-    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
-    z5 = z3 << CONST_BITS;
-
-    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
-    z4 = z5 + tmp12;
-
-    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
-    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
-
-    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
-    z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
-
-    tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
-
-    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
-    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
-
-    /* Final output stage */
-
-    wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[5*2] = (int) (tmp22 + tmp12);
-    wsptr[5*7] = (int) (tmp22 - tmp12);
-    wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 10 rows from work array, store into output array.
-   * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 10; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp12 = (INT32) wsptr[0] +
-	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	       (ONE << (PASS1_BITS+2)));
-    tmp12 <<= CONST_BITS;
-    tmp13 = (INT32) wsptr[2];
-    tmp14 = (INT32) wsptr[4];
-    z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
-    z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
-    z3 = tmp12 + z2;
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z1;
-    tmp12 -= z2 << 2;
-
-    /* Odd part */
-
-    z2 = (INT32) wsptr[1];
-    z3 = (INT32) wsptr[3];
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));       /* c3 */
-    tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148));    /* c1-c3 */
-    tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899));    /* c1+c3 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 5;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 4x8 output block.
- *
- * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[4*8];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * Note results are scaled up by sqrt(8) compared to a true IDCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 4; ctr > 0; ctr--) {
-    /* Due to quantization, we will usually find that many of the input
-     * coefficients are zero, especially the AC terms.  We can exploit this
-     * by short-circuiting the IDCT calculation for any column in which all
-     * the AC terms are zero.  In that case each output is equal to the
-     * DC coefficient (with scale factor as needed).
-     * With typical images and quantization tables, half or more of the
-     * column DCT calculations can be simplified this way.
-     */
-
-    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-	inptr[DCTSIZE*7] == 0) {
-      /* AC terms all zero */
-      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
-
-      wsptr[4*0] = dcval;
-      wsptr[4*1] = dcval;
-      wsptr[4*2] = dcval;
-      wsptr[4*3] = dcval;
-      wsptr[4*4] = dcval;
-      wsptr[4*5] = dcval;
-      wsptr[4*6] = dcval;
-      wsptr[4*7] = dcval;
-
-      inptr++;			/* advance pointers to next column */
-      quantptr++;
-      wsptr++;
-      continue;
-    }
-
-    /* Even part: reverse the even part of the forward DCT.
-     * The rotator is c(-6).
-     */
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z2 <<= CONST_BITS;
-    z3 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    tmp0 = z2 + z3;
-    tmp1 = z2 - z3;
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
-
-    tmp10 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-    tmp11 = tmp1 + tmp3;
-    tmp12 = tmp1 - tmp3;
-
-    /* Odd part per figure 8; the matrix is unitary and hence its
-     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
-     */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-
-    z2 = tmp0 + tmp2;
-    z3 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
-    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
-    z2 += z1;
-    z3 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
-    tmp0 += z1 + z2;
-    tmp3 += z1 + z3;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
-    tmp1 += z1 + z3;
-    tmp2 += z1 + z2;
-
-    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-
-    wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
-
-    inptr++;			/* advance pointers to next column */
-    quantptr++;
-    wsptr++;
-  }
-
-  /* Pass 2: process 8 rows from work array, store into output array.
-   * 4-point IDCT kernel,
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp0 = (INT32) wsptr[0] +
-	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	      (ONE << (PASS1_BITS+2)));
-    tmp2 = (INT32) wsptr[2];
-
-    tmp10 = (tmp0 + tmp2) << CONST_BITS;
-    tmp12 = (tmp0 - tmp2) << CONST_BITS;
-
-    /* Odd part */
-    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
-
-    z2 = (INT32) wsptr[1];
-    z3 = (INT32) wsptr[3];
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
-    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
-    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 4;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 3x6 output block.
- *
- * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[3*6];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp0 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
-    tmp1 = tmp0 + tmp10;
-    tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
-    tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
-    tmp10 = tmp1 + tmp0;
-    tmp12 = tmp1 - tmp0;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
-    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
-    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
-    tmp1 = (z1 - z2 - z3) << PASS1_BITS;
-
-    /* Final output stage */
-
-    wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[3*1] = (int) (tmp11 + tmp1);
-    wsptr[3*4] = (int) (tmp11 - tmp1);
-    wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 6 rows from work array, store into output array.
-   * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 6; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp0 = (INT32) wsptr[0] +
-	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	      (ONE << (PASS1_BITS+2)));
-    tmp0 <<= CONST_BITS;
-    tmp2 = (INT32) wsptr[2];
-    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
-    tmp10 = tmp0 + tmp12;
-    tmp2 = tmp0 - tmp12 - tmp12;
-
-    /* Odd part */
-
-    tmp12 = (INT32) wsptr[1];
-    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 3;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 2x4 output block.
- *
- * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp2, tmp10, tmp12;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  INT32 * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  INT32 workspace[2*4];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 4-point IDCT kernel,
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-
-    tmp10 = (tmp0 + tmp2) << CONST_BITS;
-    tmp12 = (tmp0 - tmp2) << CONST_BITS;
-
-    /* Odd part */
-    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
-    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
-    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
-
-    /* Final output stage */
-
-    wsptr[2*0] = tmp10 + tmp0;
-    wsptr[2*3] = tmp10 - tmp0;
-    wsptr[2*1] = tmp12 + tmp2;
-    wsptr[2*2] = tmp12 - tmp2;
-  }
-
-  /* Pass 2: process 4 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 4; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp10 = wsptr[0] +
-	      ((((INT32) RANGE_CENTER) << (CONST_BITS+3)) +
-	       (ONE << (CONST_BITS+2)));
-
-    /* Odd part */
-
-    tmp0 = wsptr[1];
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 2;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 1x2 output block.
- *
- * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  DCTELEM tmp0, tmp1;
-  ISLOW_MULT_TYPE * quantptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  ISHIFT_TEMPS
-
-  /* Process 1 column from input, store into output array. */
-
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-
-  /* Even part */
-
-  tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
-  /* Add range center and fudge factor for final descale and range-limit. */
-  tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
-
-  /* Odd part */
-
-  tmp1 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
-
-  /* Final output stage */
-
-  output_buf[0][output_col] =
-    range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
-  output_buf[1][output_col] =
-    range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
-}
-
-#endif /* IDCT_SCALING_SUPPORTED */
-#endif /* DCT_ISLOW_SUPPORTED */
diff --git a/dep/libjpeg/src/jinclude.h b/dep/libjpeg/src/jinclude.h
deleted file mode 100644
index 12ea8cd2f..000000000
--- a/dep/libjpeg/src/jinclude.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * jinclude.h
- *
- * Copyright (C) 1991-1994, Thomas G. Lane.
- * Modified 2017-2022 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file exists to provide a single place to fix any problems with
- * including the wrong system include files.  (Common problems are taken
- * care of by the standard jconfig symbols, but on really weird systems
- * you may have to edit this file.)
- *
- * NOTE: this file is NOT intended to be included by applications using
- * the JPEG library.  Most applications need only include jpeglib.h.
- */
-
-
-/* Include auto-config file to find out which system include files we need. */
-
-#include "jconfig.h"		/* auto configuration options */
-#define JCONFIG_INCLUDED	/* so that jpeglib.h doesn't do it again */
-
-/*
- * We need the NULL macro and size_t typedef.
- * On an ANSI-conforming system it is sufficient to include <stddef.h>.
- * Otherwise, we get them from <stdlib.h> or <stdio.h>; we may have to
- * pull in <sys/types.h> as well.
- * Note that the core JPEG library does not require <stdio.h>;
- * only the default error handler and data source/destination modules do.
- * But we must pull it in because of the references to FILE in jpeglib.h.
- * You can remove those references if you want to compile without <stdio.h>.
- */
-
-#ifdef HAVE_STDDEF_H
-#include <stddef.h>
-#endif
-
-#ifdef HAVE_STDLIB_H
-#include <stdlib.h>
-#endif
-
-#ifdef NEED_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-
-#include <stdio.h>
-
-/*
- * We need memory copying and zeroing functions, plus strncpy().
- * ANSI and System V implementations declare these in <string.h>.
- * BSD doesn't have the mem() functions, but it does have bcopy()/bzero().
- * Some systems may declare memset and memcpy in <memory.h>.
- *
- * NOTE: we assume the size parameters to these functions are of type size_t.
- * Change the casts in these macros if not!
- */
-
-#ifdef NEED_BSD_STRINGS
-
-#include <strings.h>
-#define MEMZERO(target,size)	bzero((void *)(target), (size_t)(size))
-#define MEMCOPY(dest,src,size)	bcopy((const void *)(src), (void *)(dest), (size_t)(size))
-
-#else /* not BSD, assume ANSI/SysV string lib */
-
-#include <string.h>
-#define MEMZERO(target,size)	memset((void *)(target), 0, (size_t)(size))
-#define MEMCOPY(dest,src,size)	memcpy((void *)(dest), (const void *)(src), (size_t)(size))
-
-#endif
-
-/*
- * In ANSI C, and indeed any rational implementation, size_t is also the
- * type returned by sizeof().  However, it seems there are some irrational
- * implementations out there, in which sizeof() returns an int even though
- * size_t is defined as long or unsigned long.  To ensure consistent results
- * we always use this SIZEOF() macro in place of using sizeof() directly.
- */
-
-#define SIZEOF(object)	((size_t) sizeof(object))
-
-/*
- * The modules that use fread() and fwrite() always invoke them through
- * these macros.  On some systems you may need to twiddle the argument casts.
- * CAUTION: argument order is different from underlying functions!
- *
- * Furthermore, macros are provided for fflush() and ferror() in order
- * to facilitate adaption by applications using an own FILE class.
- *
- * You can define your own custom file I/O functions in jconfig.h and
- * #define JPEG_HAVE_FILE_IO_CUSTOM there to prevent redefinition here.
- *
- * You can #define JPEG_USE_FILE_IO_CUSTOM in jconfig.h to use custom file
- * I/O functions implemented in Delphi VCL (Visual Component Library)
- * in Vcl.Imaging.jpeg.pas for the TJPEGImage component utilizing
- * the Delphi RTL (Run-Time Library) TMemoryStream component:
- *
- *   procedure jpeg_stdio_src(var cinfo: jpeg_decompress_struct;
- *     input_file: TStream); external;
- *
- *   procedure jpeg_stdio_dest(var cinfo: jpeg_compress_struct;
- *     output_file: TStream); external;
- *
- *   function jfread(var buf; recsize, reccount: Integer; S: TStream): Integer;
- *   begin
- *     Result := S.Read(buf, recsize * reccount);
- *   end;
- *
- *   function jfwrite(const buf; recsize, reccount: Integer; S: TStream): Integer;
- *   begin
- *     Result := S.Write(buf, recsize * reccount);
- *   end;
- *
- *   function jfflush(S: TStream): Integer;
- *   begin
- *     Result := 0;
- *   end;
- *
- *   function jferror(S: TStream): Integer;
- *   begin
- *     Result := 0;
- *   end;
- *
- * TMemoryStream of Delphi RTL has the distinctive feature to provide dynamic
- * memory buffer management with a file/stream-based interface, particularly for
- * the write (output) operation, which is easier to apply compared with direct
- * implementations as given in jdatadst.c for memory destination.  Those direct
- * implementations of dynamic memory write tend to be more difficult to use,
- * so providing an option like TMemoryStream may be a useful alternative.
- *
- * The CFile/CMemFile classes of the Microsoft Foundation Class (MFC) Library
- * may be used in a similar fashion.
- */
-
-#ifndef JPEG_HAVE_FILE_IO_CUSTOM
-#ifdef JPEG_USE_FILE_IO_CUSTOM
-extern size_t jfread(void * __ptr, size_t __size, size_t __n, FILE * __stream);
-extern size_t jfwrite(const void * __ptr, size_t __size, size_t __n, FILE * __stream);
-extern int    jfflush(FILE * __stream);
-extern int    jferror(FILE * __fp);
-
-#define JFREAD(file,buf,sizeofbuf)  \
-  ((size_t) jfread((void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
-#define JFWRITE(file,buf,sizeofbuf)  \
-  ((size_t) jfwrite((const void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
-#define JFFLUSH(file)	jfflush(file)
-#define JFERROR(file)	jferror(file)
-#else
-#define JFREAD(file,buf,sizeofbuf)  \
-  ((size_t) fread((void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
-#define JFWRITE(file,buf,sizeofbuf)  \
-  ((size_t) fwrite((const void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
-#define JFFLUSH(file)	fflush(file)
-#define JFERROR(file)	ferror(file)
-#endif
-#endif
diff --git a/dep/libjpeg/src/jmemmgr.c b/dep/libjpeg/src/jmemmgr.c
deleted file mode 100644
index 40377de47..000000000
--- a/dep/libjpeg/src/jmemmgr.c
+++ /dev/null
@@ -1,1115 +0,0 @@
-/*
- * jmemmgr.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2011-2019 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the JPEG system-independent memory management
- * routines.  This code is usable across a wide variety of machines; most
- * of the system dependencies have been isolated in a separate file.
- * The major functions provided here are:
- *   * pool-based allocation and freeing of memory;
- *   * policy decisions about how to divide available memory among the
- *     virtual arrays;
- *   * control logic for swapping virtual arrays between main memory and
- *     backing storage.
- * The separate system-dependent file provides the actual backing-storage
- * access code, and it contains the policy decision about how much total
- * main memory to use.
- * This file is system-dependent in the sense that some of its functions
- * are unnecessary in some systems.  For example, if there is enough virtual
- * memory so that backing storage will never be used, much of the virtual
- * array control logic could be removed.  (Of course, if you have that much
- * memory then you shouldn't care about a little bit of unused code...)
- */
-
-#define JPEG_INTERNALS
-#define AM_MEMORY_MANAGER	/* we define jvirt_Xarray_control structs */
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jmemsys.h"		/* import the system-dependent declarations */
-
-#ifndef NO_GETENV
-#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare getenv() */
-extern char * getenv JPP((const char * name));
-#endif
-#endif
-
-
-/*
- * Some important notes:
- *   The allocation routines provided here must never return NULL.
- *   They should exit to error_exit if unsuccessful.
- *
- *   It's not a good idea to try to merge the sarray and barray routines,
- *   even though they are textually almost the same, because samples are
- *   usually stored as bytes while coefficients are shorts or ints.  Thus,
- *   in machines where byte pointers have a different representation from
- *   word pointers, the resulting machine code could not be the same.
- */
-
-
-/*
- * Many machines require storage alignment: longs must start on 4-byte
- * boundaries, doubles on 8-byte boundaries, etc.  On such machines, malloc()
- * always returns pointers that are multiples of the worst-case alignment
- * requirement, and we had better do so too.
- * There isn't any really portable way to determine the worst-case alignment
- * requirement.  This module assumes that the alignment requirement is
- * multiples of sizeof(ALIGN_TYPE).
- * By default, we define ALIGN_TYPE as double.  This is necessary on some
- * workstations (where doubles really do need 8-byte alignment) and will work
- * fine on nearly everything.  If your machine has lesser alignment needs,
- * you can save a few bytes by making ALIGN_TYPE smaller.
- * The only place I know of where this will NOT work is certain Macintosh
- * 680x0 compilers that define double as a 10-byte IEEE extended float.
- * Doing 10-byte alignment is counterproductive because longwords won't be
- * aligned well.  Put "#define ALIGN_TYPE long" in jconfig.h if you have
- * such a compiler.
- */
-
-#ifndef ALIGN_TYPE		/* so can override from jconfig.h */
-#define ALIGN_TYPE  double
-#endif
-
-
-/*
- * We allocate objects from "pools", where each pool is gotten with a single
- * request to jpeg_get_small() or jpeg_get_large().  There is no per-object
- * overhead within a pool, except for alignment padding.  Each pool has a
- * header with a link to the next pool of the same class.
- * Small and large pool headers are identical except that the latter's
- * link pointer must be FAR on 80x86 machines.
- * Notice that the "real" header fields are union'ed with a dummy ALIGN_TYPE
- * field.  This forces the compiler to make SIZEOF(small_pool_hdr) a multiple
- * of the alignment requirement of ALIGN_TYPE.
- */
-
-typedef union small_pool_struct * small_pool_ptr;
-
-typedef union small_pool_struct {
-  struct {
-    small_pool_ptr next;	/* next in list of pools */
-    size_t bytes_used;		/* how many bytes already used within pool */
-    size_t bytes_left;		/* bytes still available in this pool */
-  } hdr;
-  ALIGN_TYPE dummy;		/* included in union to ensure alignment */
-} small_pool_hdr;
-
-typedef union large_pool_struct FAR * large_pool_ptr;
-
-typedef union large_pool_struct {
-  struct {
-    large_pool_ptr next;	/* next in list of pools */
-    size_t bytes_used;		/* how many bytes already used within pool */
-    size_t bytes_left;		/* bytes still available in this pool */
-  } hdr;
-  ALIGN_TYPE dummy;		/* included in union to ensure alignment */
-} large_pool_hdr;
-
-
-/*
- * Here is the full definition of a memory manager object.
- */
-
-typedef struct {
-  struct jpeg_memory_mgr pub;	/* public fields */
-
-  /* Each pool identifier (lifetime class) names a linked list of pools. */
-  small_pool_ptr small_list[JPOOL_NUMPOOLS];
-  large_pool_ptr large_list[JPOOL_NUMPOOLS];
-
-  /* Since we only have one lifetime class of virtual arrays, only one
-   * linked list is necessary (for each datatype).  Note that the virtual
-   * array control blocks being linked together are actually stored somewhere
-   * in the small-pool list.
-   */
-  jvirt_sarray_ptr virt_sarray_list;
-  jvirt_barray_ptr virt_barray_list;
-
-  /* This counts total space obtained from jpeg_get_small/large */
-  size_t total_space_allocated;
-
-  /* alloc_sarray and alloc_barray set this value for use by virtual
-   * array routines.
-   */
-  JDIMENSION last_rowsperchunk;	/* from most recent alloc_sarray/barray */
-} my_memory_mgr;
-
-typedef my_memory_mgr * my_mem_ptr;
-
-
-/*
- * The control blocks for virtual arrays.
- * Note that these blocks are allocated in the "small" pool area.
- * System-dependent info for the associated backing store (if any) is hidden
- * inside the backing_store_info struct.
- */
-
-struct jvirt_sarray_control {
-  JSAMPARRAY mem_buffer;	/* => the in-memory buffer */
-  JDIMENSION rows_in_array;	/* total virtual array height */
-  JDIMENSION samplesperrow;	/* width of array (and of memory buffer) */
-  JDIMENSION maxaccess;		/* max rows accessed by access_virt_sarray */
-  JDIMENSION rows_in_mem;	/* height of memory buffer */
-  JDIMENSION rowsperchunk;	/* allocation chunk size in mem_buffer */
-  JDIMENSION cur_start_row;	/* first logical row # in the buffer */
-  JDIMENSION first_undef_row;	/* row # of first uninitialized row */
-  boolean pre_zero;		/* pre-zero mode requested? */
-  boolean dirty;		/* do current buffer contents need written? */
-  boolean b_s_open;		/* is backing-store data valid? */
-  jvirt_sarray_ptr next;	/* link to next virtual sarray control block */
-  backing_store_info b_s_info;	/* System-dependent control info */
-};
-
-struct jvirt_barray_control {
-  JBLOCKARRAY mem_buffer;	/* => the in-memory buffer */
-  JDIMENSION rows_in_array;	/* total virtual array height */
-  JDIMENSION blocksperrow;	/* width of array (and of memory buffer) */
-  JDIMENSION maxaccess;		/* max rows accessed by access_virt_barray */
-  JDIMENSION rows_in_mem;	/* height of memory buffer */
-  JDIMENSION rowsperchunk;	/* allocation chunk size in mem_buffer */
-  JDIMENSION cur_start_row;	/* first logical row # in the buffer */
-  JDIMENSION first_undef_row;	/* row # of first uninitialized row */
-  boolean pre_zero;		/* pre-zero mode requested? */
-  boolean dirty;		/* do current buffer contents need written? */
-  boolean b_s_open;		/* is backing-store data valid? */
-  jvirt_barray_ptr next;	/* link to next virtual barray control block */
-  backing_store_info b_s_info;	/* System-dependent control info */
-};
-
-
-#ifdef MEM_STATS		/* optional extra stuff for statistics */
-
-LOCAL(void)
-print_mem_stats (j_common_ptr cinfo, int pool_id)
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  small_pool_ptr shdr_ptr;
-  large_pool_ptr lhdr_ptr;
-
-  /* Since this is only a debugging stub, we can cheat a little by using
-   * fprintf directly rather than going through the trace message code.
-   * This is helpful because message parm array can't handle longs.
-   */
-  fprintf(stderr, "Freeing pool %d, total space = %ld\n",
-	  pool_id, (long) mem->total_space_allocated);
-
-  for (lhdr_ptr = mem->large_list[pool_id]; lhdr_ptr != NULL;
-       lhdr_ptr = lhdr_ptr->hdr.next) {
-    fprintf(stderr, "  Large chunk used %ld\n",
-	    (long) lhdr_ptr->hdr.bytes_used);
-  }
-
-  for (shdr_ptr = mem->small_list[pool_id]; shdr_ptr != NULL;
-       shdr_ptr = shdr_ptr->hdr.next) {
-    fprintf(stderr, "  Small chunk used %ld free %ld\n",
-	    (long) shdr_ptr->hdr.bytes_used,
-	    (long) shdr_ptr->hdr.bytes_left);
-  }
-}
-
-#endif /* MEM_STATS */
-
-
-LOCAL(noreturn_t)
-out_of_memory (j_common_ptr cinfo, int which)
-/* Report an out-of-memory error and stop execution */
-/* If we compiled MEM_STATS support, report alloc requests before dying */
-{
-#ifdef MEM_STATS
-  cinfo->err->trace_level = 2;	/* force self_destruct to report stats */
-#endif
-  ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, which);
-}
-
-
-/*
- * Allocation of "small" objects.
- *
- * For these, we use pooled storage.  When a new pool must be created,
- * we try to get enough space for the current request plus a "slop" factor,
- * where the slop will be the amount of leftover space in the new pool.
- * The speed vs. space tradeoff is largely determined by the slop values.
- * A different slop value is provided for each pool class (lifetime),
- * and we also distinguish the first pool of a class from later ones.
- * NOTE: the values given work fairly well on both 16- and 32-bit-int
- * machines, but may be too small if longs are 64 bits or more.
- */
-
-static const size_t first_pool_slop[JPOOL_NUMPOOLS] = 
-{
-	1600,			/* first PERMANENT pool */
-	16000			/* first IMAGE pool */
-};
-
-static const size_t extra_pool_slop[JPOOL_NUMPOOLS] = 
-{
-	0,			/* additional PERMANENT pools */
-	5000			/* additional IMAGE pools */
-};
-
-#define MIN_SLOP  50		/* greater than 0 to avoid futile looping */
-
-
-METHODDEF(void *)
-alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
-/* Allocate a "small" object */
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  small_pool_ptr hdr_ptr, prev_hdr_ptr;
-  size_t odd_bytes, min_request, slop;
-  char * data_ptr;
-
-  /* Check for unsatisfiable request (do now to ensure no overflow below) */
-  if (sizeofobject > (size_t) MAX_ALLOC_CHUNK - SIZEOF(small_pool_hdr))
-    out_of_memory(cinfo, 1);	/* request exceeds malloc's ability */
-
-  /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
-  odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
-  if (odd_bytes > 0)
-    sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
-
-  /* See if space is available in any existing pool */
-  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
-  prev_hdr_ptr = NULL;
-  hdr_ptr = mem->small_list[pool_id];
-  while (hdr_ptr != NULL) {
-    if (hdr_ptr->hdr.bytes_left >= sizeofobject)
-      break;			/* found pool with enough space */
-    prev_hdr_ptr = hdr_ptr;
-    hdr_ptr = hdr_ptr->hdr.next;
-  }
-
-  /* Time to make a new pool? */
-  if (hdr_ptr == NULL) {
-    /* min_request is what we need now, slop is what will be leftover */
-    min_request = sizeofobject + SIZEOF(small_pool_hdr);
-    if (prev_hdr_ptr == NULL)	/* first pool in class? */
-      slop = first_pool_slop[pool_id];
-    else
-      slop = extra_pool_slop[pool_id];
-    /* Don't ask for more than MAX_ALLOC_CHUNK */
-    if (slop > (size_t) MAX_ALLOC_CHUNK - min_request)
-      slop = (size_t) MAX_ALLOC_CHUNK - min_request;
-    /* Try to get space, if fail reduce slop and try again */
-    for (;;) {
-      hdr_ptr = (small_pool_ptr) jpeg_get_small(cinfo, min_request + slop);
-      if (hdr_ptr != NULL)
-	break;
-      slop /= 2;
-      if (slop < MIN_SLOP)	/* give up when it gets real small */
-	out_of_memory(cinfo, 2); /* jpeg_get_small failed */
-    }
-    mem->total_space_allocated += min_request + slop;
-    /* Success, initialize the new pool header and add to end of list */
-    hdr_ptr->hdr.next = NULL;
-    hdr_ptr->hdr.bytes_used = 0;
-    hdr_ptr->hdr.bytes_left = sizeofobject + slop;
-    if (prev_hdr_ptr == NULL)	/* first pool in class? */
-      mem->small_list[pool_id] = hdr_ptr;
-    else
-      prev_hdr_ptr->hdr.next = hdr_ptr;
-  }
-
-  /* OK, allocate the object from the current pool */
-  data_ptr = (char *) (hdr_ptr + 1); /* point to first data byte in pool */
-  data_ptr += hdr_ptr->hdr.bytes_used; /* point to place for object */
-  hdr_ptr->hdr.bytes_used += sizeofobject;
-  hdr_ptr->hdr.bytes_left -= sizeofobject;
-
-  return (void *) data_ptr;
-}
-
-
-/*
- * Allocation of "large" objects.
- *
- * The external semantics of these are the same as "small" objects,
- * except that FAR pointers are used on 80x86.  However the pool
- * management heuristics are quite different.  We assume that each
- * request is large enough that it may as well be passed directly to
- * jpeg_get_large; the pool management just links everything together
- * so that we can free it all on demand.
- * Note: the major use of "large" objects is in JSAMPARRAY and JBLOCKARRAY
- * structures.  The routines that create these structures (see below)
- * deliberately bunch rows together to ensure a large request size.
- */
-
-METHODDEF(void FAR *)
-alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
-/* Allocate a "large" object */
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  large_pool_ptr hdr_ptr;
-  size_t odd_bytes;
-
-  /* Check for unsatisfiable request (do now to ensure no overflow below) */
-  if (sizeofobject > (size_t) MAX_ALLOC_CHUNK - SIZEOF(large_pool_hdr))
-    out_of_memory(cinfo, 3);	/* request exceeds malloc's ability */
-
-  /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
-  odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
-  if (odd_bytes > 0)
-    sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
-
-  /* Always make a new pool */
-  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
-
-  hdr_ptr = (large_pool_ptr) jpeg_get_large(cinfo, sizeofobject +
-					    SIZEOF(large_pool_hdr));
-  if (hdr_ptr == NULL)
-    out_of_memory(cinfo, 4);	/* jpeg_get_large failed */
-  mem->total_space_allocated += sizeofobject + SIZEOF(large_pool_hdr);
-
-  /* Success, initialize the new pool header and add to list */
-  hdr_ptr->hdr.next = mem->large_list[pool_id];
-  /* We maintain space counts in each pool header for statistical purposes,
-   * even though they are not needed for allocation.
-   */
-  hdr_ptr->hdr.bytes_used = sizeofobject;
-  hdr_ptr->hdr.bytes_left = 0;
-  mem->large_list[pool_id] = hdr_ptr;
-
-  return (void FAR *) (hdr_ptr + 1); /* point to first data byte in pool */
-}
-
-
-/*
- * Creation of 2-D sample arrays.
- * The pointers are in near heap, the samples themselves in FAR heap.
- *
- * To minimize allocation overhead and to allow I/O of large contiguous
- * blocks, we allocate the sample rows in groups of as many rows as possible
- * without exceeding MAX_ALLOC_CHUNK total bytes per allocation request.
- * NB: the virtual array control routines, later in this file, know about
- * this chunking of rows.  The rowsperchunk value is left in the mem manager
- * object so that it can be saved away if this sarray is the workspace for
- * a virtual array.
- */
-
-METHODDEF(JSAMPARRAY)
-alloc_sarray (j_common_ptr cinfo, int pool_id,
-	      JDIMENSION samplesperrow, JDIMENSION numrows)
-/* Allocate a 2-D sample array */
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  JSAMPARRAY result;
-  JSAMPROW workspace;
-  JDIMENSION rowsperchunk, currow, i;
-  long ltemp;
-
-  /* Calculate max # of rows allowed in one allocation chunk */
-  ltemp = (MAX_ALLOC_CHUNK - SIZEOF(large_pool_hdr)) /
-	  ((long) samplesperrow * SIZEOF(JSAMPLE));
-  if (ltemp <= 0)
-    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
-  if (ltemp < (long) numrows)
-    rowsperchunk = (JDIMENSION) ltemp;
-  else
-    rowsperchunk = numrows;
-  mem->last_rowsperchunk = rowsperchunk;
-
-  /* Get space for row pointers (small object) */
-  result = (JSAMPARRAY) alloc_small(cinfo, pool_id,
-				    (size_t) numrows * SIZEOF(JSAMPROW));
-
-  /* Get the rows themselves (large objects) */
-  currow = 0;
-  while (currow < numrows) {
-    rowsperchunk = MIN(rowsperchunk, numrows - currow);
-    workspace = (JSAMPROW) alloc_large(cinfo, pool_id,
-      (size_t) rowsperchunk * (size_t) samplesperrow * SIZEOF(JSAMPLE));
-    for (i = rowsperchunk; i > 0; i--) {
-      result[currow++] = workspace;
-      workspace += samplesperrow;
-    }
-  }
-
-  return result;
-}
-
-
-/*
- * Creation of 2-D coefficient-block arrays.
- * This is essentially the same as the code for sample arrays, above.
- */
-
-METHODDEF(JBLOCKARRAY)
-alloc_barray (j_common_ptr cinfo, int pool_id,
-	      JDIMENSION blocksperrow, JDIMENSION numrows)
-/* Allocate a 2-D coefficient-block array */
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  JBLOCKARRAY result;
-  JBLOCKROW workspace;
-  JDIMENSION rowsperchunk, currow, i;
-  long ltemp;
-
-  /* Calculate max # of rows allowed in one allocation chunk */
-  ltemp = (MAX_ALLOC_CHUNK - SIZEOF(large_pool_hdr)) /
-	  ((long) blocksperrow * SIZEOF(JBLOCK));
-  if (ltemp <= 0)
-    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
-  if (ltemp < (long) numrows)
-    rowsperchunk = (JDIMENSION) ltemp;
-  else
-    rowsperchunk = numrows;
-  mem->last_rowsperchunk = rowsperchunk;
-
-  /* Get space for row pointers (small object) */
-  result = (JBLOCKARRAY) alloc_small(cinfo, pool_id,
-				     (size_t) numrows * SIZEOF(JBLOCKROW));
-
-  /* Get the rows themselves (large objects) */
-  currow = 0;
-  while (currow < numrows) {
-    rowsperchunk = MIN(rowsperchunk, numrows - currow);
-    workspace = (JBLOCKROW) alloc_large(cinfo, pool_id,
-      (size_t) rowsperchunk * (size_t) blocksperrow * SIZEOF(JBLOCK));
-    for (i = rowsperchunk; i > 0; i--) {
-      result[currow++] = workspace;
-      workspace += blocksperrow;
-    }
-  }
-
-  return result;
-}
-
-
-/*
- * About virtual array management:
- *
- * The above "normal" array routines are only used to allocate strip buffers
- * (as wide as the image, but just a few rows high).  Full-image-sized buffers
- * are handled as "virtual" arrays.  The array is still accessed a strip at a
- * time, but the memory manager must save the whole array for repeated
- * accesses.  The intended implementation is that there is a strip buffer in
- * memory (as high as is possible given the desired memory limit), plus a
- * backing file that holds the rest of the array.
- *
- * The request_virt_array routines are told the total size of the image and
- * the maximum number of rows that will be accessed at once.  The in-memory
- * buffer must be at least as large as the maxaccess value.
- *
- * The request routines create control blocks but not the in-memory buffers.
- * That is postponed until realize_virt_arrays is called.  At that time the
- * total amount of space needed is known (approximately, anyway), so free
- * memory can be divided up fairly.
- *
- * The access_virt_array routines are responsible for making a specific strip
- * area accessible (after reading or writing the backing file, if necessary).
- * Note that the access routines are told whether the caller intends to modify
- * the accessed strip; during a read-only pass this saves having to rewrite
- * data to disk.  The access routines are also responsible for pre-zeroing
- * any newly accessed rows, if pre-zeroing was requested.
- *
- * In current usage, the access requests are usually for nonoverlapping
- * strips; that is, successive access start_row numbers differ by exactly
- * num_rows = maxaccess.  This means we can get good performance with simple
- * buffer dump/reload logic, by making the in-memory buffer be a multiple
- * of the access height; then there will never be accesses across bufferload
- * boundaries.  The code will still work with overlapping access requests,
- * but it doesn't handle bufferload overlaps very efficiently.
- */
-
-
-METHODDEF(jvirt_sarray_ptr)
-request_virt_sarray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
-		     JDIMENSION samplesperrow, JDIMENSION numrows,
-		     JDIMENSION maxaccess)
-/* Request a virtual 2-D sample array */
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  jvirt_sarray_ptr result;
-
-  /* Only IMAGE-lifetime virtual arrays are currently supported */
-  if (pool_id != JPOOL_IMAGE)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
-
-  /* get control block */
-  result = (jvirt_sarray_ptr) alloc_small(cinfo, pool_id,
-					  SIZEOF(struct jvirt_sarray_control));
-
-  result->mem_buffer = NULL;	/* marks array not yet realized */
-  result->rows_in_array = numrows;
-  result->samplesperrow = samplesperrow;
-  result->maxaccess = maxaccess;
-  result->pre_zero = pre_zero;
-  result->b_s_open = FALSE;	/* no associated backing-store object */
-  result->next = mem->virt_sarray_list; /* add to list of virtual arrays */
-  mem->virt_sarray_list = result;
-
-  return result;
-}
-
-
-METHODDEF(jvirt_barray_ptr)
-request_virt_barray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
-		     JDIMENSION blocksperrow, JDIMENSION numrows,
-		     JDIMENSION maxaccess)
-/* Request a virtual 2-D coefficient-block array */
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  jvirt_barray_ptr result;
-
-  /* Only IMAGE-lifetime virtual arrays are currently supported */
-  if (pool_id != JPOOL_IMAGE)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
-
-  /* get control block */
-  result = (jvirt_barray_ptr) alloc_small(cinfo, pool_id,
-					  SIZEOF(struct jvirt_barray_control));
-
-  result->mem_buffer = NULL;	/* marks array not yet realized */
-  result->rows_in_array = numrows;
-  result->blocksperrow = blocksperrow;
-  result->maxaccess = maxaccess;
-  result->pre_zero = pre_zero;
-  result->b_s_open = FALSE;	/* no associated backing-store object */
-  result->next = mem->virt_barray_list; /* add to list of virtual arrays */
-  mem->virt_barray_list = result;
-
-  return result;
-}
-
-
-METHODDEF(void)
-realize_virt_arrays (j_common_ptr cinfo)
-/* Allocate the in-memory buffers for any unrealized virtual arrays */
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  long bytesperrow, space_per_minheight, maximum_space;
-  long avail_mem, minheights, max_minheights;
-  jvirt_sarray_ptr sptr;
-  jvirt_barray_ptr bptr;
-
-  /* Compute the minimum space needed (maxaccess rows in each buffer)
-   * and the maximum space needed (full image height in each buffer).
-   * These may be of use to the system-dependent jpeg_mem_available routine.
-   */
-  space_per_minheight = 0;
-  maximum_space = 0;
-  for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
-    if (sptr->mem_buffer == NULL) { /* if not realized yet */
-      bytesperrow = (long) sptr->samplesperrow * SIZEOF(JSAMPLE);
-      space_per_minheight += (long) sptr->maxaccess * bytesperrow;
-      maximum_space += (long) sptr->rows_in_array * bytesperrow;
-    }
-  }
-  for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
-    if (bptr->mem_buffer == NULL) { /* if not realized yet */
-      bytesperrow = (long) bptr->blocksperrow * SIZEOF(JBLOCK);
-      space_per_minheight += (long) bptr->maxaccess * bytesperrow;
-      maximum_space += (long) bptr->rows_in_array * bytesperrow;
-    }
-  }
-
-  if (space_per_minheight <= 0)
-    return;			/* no unrealized arrays, no work */
-
-  /* Determine amount of memory to actually use; this is system-dependent. */
-  avail_mem = jpeg_mem_available(cinfo, space_per_minheight, maximum_space,
-				 (long) mem->total_space_allocated);
-
-  /* If the maximum space needed is available, make all the buffers full
-   * height; otherwise parcel it out with the same number of minheights
-   * in each buffer.
-   */
-  if (avail_mem >= maximum_space)
-    max_minheights = 1000000000L;
-  else {
-    max_minheights = avail_mem / space_per_minheight;
-    /* If there doesn't seem to be enough space, try to get the minimum
-     * anyway.  This allows a "stub" implementation of jpeg_mem_available().
-     */
-    if (max_minheights <= 0)
-      max_minheights = 1;
-  }
-
-  /* Allocate the in-memory buffers and initialize backing store as needed. */
-
-  for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
-    if (sptr->mem_buffer == NULL) { /* if not realized yet */
-      minheights = ((long) sptr->rows_in_array - 1L) / sptr->maxaccess + 1L;
-      if (minheights <= max_minheights) {
-	/* This buffer fits in memory */
-	sptr->rows_in_mem = sptr->rows_in_array;
-      } else {
-	/* It doesn't fit in memory, create backing store. */
-	sptr->rows_in_mem = (JDIMENSION) (max_minheights * sptr->maxaccess);
-	jpeg_open_backing_store(cinfo, & sptr->b_s_info,
-				(long) sptr->rows_in_array *
-				(long) sptr->samplesperrow *
-				(long) SIZEOF(JSAMPLE));
-	sptr->b_s_open = TRUE;
-      }
-      sptr->mem_buffer = alloc_sarray(cinfo, JPOOL_IMAGE,
-				      sptr->samplesperrow, sptr->rows_in_mem);
-      sptr->rowsperchunk = mem->last_rowsperchunk;
-      sptr->cur_start_row = 0;
-      sptr->first_undef_row = 0;
-      sptr->dirty = FALSE;
-    }
-  }
-
-  for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
-    if (bptr->mem_buffer == NULL) { /* if not realized yet */
-      minheights = ((long) bptr->rows_in_array - 1L) / bptr->maxaccess + 1L;
-      if (minheights <= max_minheights) {
-	/* This buffer fits in memory */
-	bptr->rows_in_mem = bptr->rows_in_array;
-      } else {
-	/* It doesn't fit in memory, create backing store. */
-	bptr->rows_in_mem = (JDIMENSION) (max_minheights * bptr->maxaccess);
-	jpeg_open_backing_store(cinfo, & bptr->b_s_info,
-				(long) bptr->rows_in_array *
-				(long) bptr->blocksperrow *
-				(long) SIZEOF(JBLOCK));
-	bptr->b_s_open = TRUE;
-      }
-      bptr->mem_buffer = alloc_barray(cinfo, JPOOL_IMAGE,
-				      bptr->blocksperrow, bptr->rows_in_mem);
-      bptr->rowsperchunk = mem->last_rowsperchunk;
-      bptr->cur_start_row = 0;
-      bptr->first_undef_row = 0;
-      bptr->dirty = FALSE;
-    }
-  }
-}
-
-
-LOCAL(void)
-do_sarray_io (j_common_ptr cinfo, jvirt_sarray_ptr ptr, boolean writing)
-/* Do backing store read or write of a virtual sample array */
-{
-  long bytesperrow, file_offset, byte_count, rows, thisrow, i;
-
-  bytesperrow = (long) ptr->samplesperrow * SIZEOF(JSAMPLE);
-  file_offset = (long) ptr->cur_start_row * bytesperrow;
-  /* Loop to read or write each allocation chunk in mem_buffer */
-  for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) {
-    /* One chunk, but check for short chunk at end of buffer */
-    rows = MIN((long) ptr->rowsperchunk, (long) ptr->rows_in_mem - i);
-    /* Transfer no more than is currently defined */
-    thisrow = (long) ptr->cur_start_row + i;
-    rows = MIN(rows, (long) ptr->first_undef_row - thisrow);
-    /* Transfer no more than fits in file */
-    rows = MIN(rows, (long) ptr->rows_in_array - thisrow);
-    if (rows <= 0)		/* this chunk might be past end of file! */
-      break;
-    byte_count = rows * bytesperrow;
-    if (writing)
-      (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info,
-					    (void FAR *) ptr->mem_buffer[i],
-					    file_offset, byte_count);
-    else
-      (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info,
-					   (void FAR *) ptr->mem_buffer[i],
-					   file_offset, byte_count);
-    file_offset += byte_count;
-  }
-}
-
-
-LOCAL(void)
-do_barray_io (j_common_ptr cinfo, jvirt_barray_ptr ptr, boolean writing)
-/* Do backing store read or write of a virtual coefficient-block array */
-{
-  long bytesperrow, file_offset, byte_count, rows, thisrow, i;
-
-  bytesperrow = (long) ptr->blocksperrow * SIZEOF(JBLOCK);
-  file_offset = (long) ptr->cur_start_row * bytesperrow;
-  /* Loop to read or write each allocation chunk in mem_buffer */
-  for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) {
-    /* One chunk, but check for short chunk at end of buffer */
-    rows = MIN((long) ptr->rowsperchunk, (long) ptr->rows_in_mem - i);
-    /* Transfer no more than is currently defined */
-    thisrow = (long) ptr->cur_start_row + i;
-    rows = MIN(rows, (long) ptr->first_undef_row - thisrow);
-    /* Transfer no more than fits in file */
-    rows = MIN(rows, (long) ptr->rows_in_array - thisrow);
-    if (rows <= 0)		/* this chunk might be past end of file! */
-      break;
-    byte_count = rows * bytesperrow;
-    if (writing)
-      (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info,
-					    (void FAR *) ptr->mem_buffer[i],
-					    file_offset, byte_count);
-    else
-      (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info,
-					   (void FAR *) ptr->mem_buffer[i],
-					   file_offset, byte_count);
-    file_offset += byte_count;
-  }
-}
-
-
-METHODDEF(JSAMPARRAY)
-access_virt_sarray (j_common_ptr cinfo, jvirt_sarray_ptr ptr,
-		    JDIMENSION start_row, JDIMENSION num_rows,
-		    boolean writable)
-/* Access the part of a virtual sample array starting at start_row */
-/* and extending for num_rows rows.  writable is true if  */
-/* caller intends to modify the accessed area. */
-{
-  JDIMENSION end_row = start_row + num_rows;
-  JDIMENSION undef_row;
-
-  /* debugging check */
-  if (end_row > ptr->rows_in_array || num_rows > ptr->maxaccess ||
-      ptr->mem_buffer == NULL)
-    ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
-
-  /* Make the desired part of the virtual array accessible */
-  if (start_row < ptr->cur_start_row ||
-      end_row > ptr->cur_start_row + ptr->rows_in_mem) {
-    if (! ptr->b_s_open)
-      ERREXIT(cinfo, JERR_VIRTUAL_BUG);
-    /* Flush old buffer contents if necessary */
-    if (ptr->dirty) {
-      do_sarray_io(cinfo, ptr, TRUE);
-      ptr->dirty = FALSE;
-    }
-    /* Decide what part of virtual array to access.
-     * Algorithm: if target address > current window, assume forward scan,
-     * load starting at target address.  If target address < current window,
-     * assume backward scan, load so that target area is top of window.
-     * Note that when switching from forward write to forward read, will have
-     * start_row = 0, so the limiting case applies and we load from 0 anyway.
-     */
-    if (start_row > ptr->cur_start_row) {
-      ptr->cur_start_row = start_row;
-    } else {
-      /* use long arithmetic here to avoid overflow & unsigned problems */
-      long ltemp;
-
-      ltemp = (long) end_row - (long) ptr->rows_in_mem;
-      if (ltemp < 0)
-	ltemp = 0;		/* don't fall off front end of file */
-      ptr->cur_start_row = (JDIMENSION) ltemp;
-    }
-    /* Read in the selected part of the array.
-     * During the initial write pass, we will do no actual read
-     * because the selected part is all undefined.
-     */
-    do_sarray_io(cinfo, ptr, FALSE);
-  }
-  /* Ensure the accessed part of the array is defined; prezero if needed.
-   * To improve locality of access, we only prezero the part of the array
-   * that the caller is about to access, not the entire in-memory array.
-   */
-  if (ptr->first_undef_row < end_row) {
-    if (ptr->first_undef_row < start_row) {
-      if (writable)		/* writer skipped over a section of array */
-	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
-      undef_row = start_row;	/* but reader is allowed to read ahead */
-    } else {
-      undef_row = ptr->first_undef_row;
-    }
-    if (writable)
-      ptr->first_undef_row = end_row;
-    if (ptr->pre_zero) {
-      size_t bytesperrow = (size_t) ptr->samplesperrow * SIZEOF(JSAMPLE);
-      undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
-      end_row -= ptr->cur_start_row;
-      while (undef_row < end_row) {
-	FMEMZERO((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
-	undef_row++;
-      }
-    } else {
-      if (! writable)		/* reader looking at undefined data */
-	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
-    }
-  }
-  /* Flag the buffer dirty if caller will write in it */
-  if (writable)
-    ptr->dirty = TRUE;
-  /* Return address of proper part of the buffer */
-  return ptr->mem_buffer + (start_row - ptr->cur_start_row);
-}
-
-
-METHODDEF(JBLOCKARRAY)
-access_virt_barray (j_common_ptr cinfo, jvirt_barray_ptr ptr,
-		    JDIMENSION start_row, JDIMENSION num_rows,
-		    boolean writable)
-/* Access the part of a virtual block array starting at start_row */
-/* and extending for num_rows rows.  writable is true if  */
-/* caller intends to modify the accessed area. */
-{
-  JDIMENSION end_row = start_row + num_rows;
-  JDIMENSION undef_row;
-
-  /* debugging check */
-  if (end_row > ptr->rows_in_array || num_rows > ptr->maxaccess ||
-      ptr->mem_buffer == NULL)
-    ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
-
-  /* Make the desired part of the virtual array accessible */
-  if (start_row < ptr->cur_start_row ||
-      end_row > ptr->cur_start_row + ptr->rows_in_mem) {
-    if (! ptr->b_s_open)
-      ERREXIT(cinfo, JERR_VIRTUAL_BUG);
-    /* Flush old buffer contents if necessary */
-    if (ptr->dirty) {
-      do_barray_io(cinfo, ptr, TRUE);
-      ptr->dirty = FALSE;
-    }
-    /* Decide what part of virtual array to access.
-     * Algorithm: if target address > current window, assume forward scan,
-     * load starting at target address.  If target address < current window,
-     * assume backward scan, load so that target area is top of window.
-     * Note that when switching from forward write to forward read, will have
-     * start_row = 0, so the limiting case applies and we load from 0 anyway.
-     */
-    if (start_row > ptr->cur_start_row) {
-      ptr->cur_start_row = start_row;
-    } else {
-      /* use long arithmetic here to avoid overflow & unsigned problems */
-      long ltemp;
-
-      ltemp = (long) end_row - (long) ptr->rows_in_mem;
-      if (ltemp < 0)
-	ltemp = 0;		/* don't fall off front end of file */
-      ptr->cur_start_row = (JDIMENSION) ltemp;
-    }
-    /* Read in the selected part of the array.
-     * During the initial write pass, we will do no actual read
-     * because the selected part is all undefined.
-     */
-    do_barray_io(cinfo, ptr, FALSE);
-  }
-  /* Ensure the accessed part of the array is defined; prezero if needed.
-   * To improve locality of access, we only prezero the part of the array
-   * that the caller is about to access, not the entire in-memory array.
-   */
-  if (ptr->first_undef_row < end_row) {
-    if (ptr->first_undef_row < start_row) {
-      if (writable)		/* writer skipped over a section of array */
-	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
-      undef_row = start_row;	/* but reader is allowed to read ahead */
-    } else {
-      undef_row = ptr->first_undef_row;
-    }
-    if (writable)
-      ptr->first_undef_row = end_row;
-    if (ptr->pre_zero) {
-      size_t bytesperrow = (size_t) ptr->blocksperrow * SIZEOF(JBLOCK);
-      undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
-      end_row -= ptr->cur_start_row;
-      while (undef_row < end_row) {
-	FMEMZERO((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
-	undef_row++;
-      }
-    } else {
-      if (! writable)		/* reader looking at undefined data */
-	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
-    }
-  }
-  /* Flag the buffer dirty if caller will write in it */
-  if (writable)
-    ptr->dirty = TRUE;
-  /* Return address of proper part of the buffer */
-  return ptr->mem_buffer + (start_row - ptr->cur_start_row);
-}
-
-
-/*
- * Release all objects belonging to a specified pool.
- */
-
-METHODDEF(void)
-free_pool (j_common_ptr cinfo, int pool_id)
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  small_pool_ptr shdr_ptr;
-  large_pool_ptr lhdr_ptr;
-  size_t space_freed;
-
-  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
-
-#ifdef MEM_STATS
-  if (cinfo->err->trace_level > 1)
-    print_mem_stats(cinfo, pool_id); /* print pool's memory usage statistics */
-#endif
-
-  /* If freeing IMAGE pool, close any virtual arrays first */
-  if (pool_id == JPOOL_IMAGE) {
-    jvirt_sarray_ptr sptr;
-    jvirt_barray_ptr bptr;
-
-    for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
-      if (sptr->b_s_open) {	/* there may be no backing store */
-	sptr->b_s_open = FALSE;	/* prevent recursive close if error */
-	(*sptr->b_s_info.close_backing_store) (cinfo, & sptr->b_s_info);
-      }
-    }
-    mem->virt_sarray_list = NULL;
-    for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
-      if (bptr->b_s_open) {	/* there may be no backing store */
-	bptr->b_s_open = FALSE;	/* prevent recursive close if error */
-	(*bptr->b_s_info.close_backing_store) (cinfo, & bptr->b_s_info);
-      }
-    }
-    mem->virt_barray_list = NULL;
-  }
-
-  /* Release large objects */
-  lhdr_ptr = mem->large_list[pool_id];
-  mem->large_list[pool_id] = NULL;
-
-  while (lhdr_ptr != NULL) {
-    large_pool_ptr next_lhdr_ptr = lhdr_ptr->hdr.next;
-    space_freed = lhdr_ptr->hdr.bytes_used +
-		  lhdr_ptr->hdr.bytes_left +
-		  SIZEOF(large_pool_hdr);
-    jpeg_free_large(cinfo, (void FAR *) lhdr_ptr, space_freed);
-    mem->total_space_allocated -= space_freed;
-    lhdr_ptr = next_lhdr_ptr;
-  }
-
-  /* Release small objects */
-  shdr_ptr = mem->small_list[pool_id];
-  mem->small_list[pool_id] = NULL;
-
-  while (shdr_ptr != NULL) {
-    small_pool_ptr next_shdr_ptr = shdr_ptr->hdr.next;
-    space_freed = shdr_ptr->hdr.bytes_used +
-		  shdr_ptr->hdr.bytes_left +
-		  SIZEOF(small_pool_hdr);
-    jpeg_free_small(cinfo, (void *) shdr_ptr, space_freed);
-    mem->total_space_allocated -= space_freed;
-    shdr_ptr = next_shdr_ptr;
-  }
-}
-
-
-/*
- * Close up shop entirely.
- * Note that this cannot be called unless cinfo->mem is non-NULL.
- */
-
-METHODDEF(void)
-self_destruct (j_common_ptr cinfo)
-{
-  int pool;
-
-  /* Close all backing store, release all memory.
-   * Releasing pools in reverse order might help avoid fragmentation
-   * with some (brain-damaged) malloc libraries.
-   */
-  for (pool = JPOOL_NUMPOOLS-1; pool >= JPOOL_PERMANENT; pool--) {
-    free_pool(cinfo, pool);
-  }
-
-  /* Release the memory manager control block too. */
-  jpeg_free_small(cinfo, (void *) cinfo->mem, SIZEOF(my_memory_mgr));
-  cinfo->mem = NULL;		/* ensures I will be called only once */
-
-  jpeg_mem_term(cinfo);		/* system-dependent cleanup */
-}
-
-
-/*
- * Memory manager initialization.
- * When this is called, only the error manager pointer is valid in cinfo!
- */
-
-GLOBAL(void)
-jinit_memory_mgr (j_common_ptr cinfo)
-{
-  my_mem_ptr mem;
-  long max_to_use;
-  int pool;
-  size_t test_mac;
-
-  cinfo->mem = NULL;		/* for safety if init fails */
-
-  /* Check for configuration errors.
-   * SIZEOF(ALIGN_TYPE) should be a power of 2; otherwise, it probably
-   * doesn't reflect any real hardware alignment requirement.
-   * The test is a little tricky: for X>0, X and X-1 have no one-bits
-   * in common if and only if X is a power of 2, ie has only one one-bit.
-   * Some compilers may give an "unreachable code" warning here; ignore it.
-   */
-  if ((SIZEOF(ALIGN_TYPE) & (SIZEOF(ALIGN_TYPE)-1)) != 0)
-    ERREXIT(cinfo, JERR_BAD_ALIGN_TYPE);
-  /* MAX_ALLOC_CHUNK must be representable as type size_t, and must be
-   * a multiple of SIZEOF(ALIGN_TYPE).
-   * Again, an "unreachable code" warning may be ignored here.
-   * But a "constant too large" warning means you need to fix MAX_ALLOC_CHUNK.
-   */
-  test_mac = (size_t) MAX_ALLOC_CHUNK;
-  if ((long) test_mac != MAX_ALLOC_CHUNK ||
-      (MAX_ALLOC_CHUNK % SIZEOF(ALIGN_TYPE)) != 0)
-    ERREXIT(cinfo, JERR_BAD_ALLOC_CHUNK);
-
-  max_to_use = jpeg_mem_init(cinfo); /* system-dependent initialization */
-
-  /* Attempt to allocate memory manager's control block */
-  mem = (my_mem_ptr) jpeg_get_small(cinfo, SIZEOF(my_memory_mgr));
-
-  if (mem == NULL) {
-    jpeg_mem_term(cinfo);	/* system-dependent cleanup */
-    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 0);
-  }
-
-  /* OK, fill in the method pointers */
-  mem->pub.alloc_small = alloc_small;
-  mem->pub.alloc_large = alloc_large;
-  mem->pub.alloc_sarray = alloc_sarray;
-  mem->pub.alloc_barray = alloc_barray;
-  mem->pub.request_virt_sarray = request_virt_sarray;
-  mem->pub.request_virt_barray = request_virt_barray;
-  mem->pub.realize_virt_arrays = realize_virt_arrays;
-  mem->pub.access_virt_sarray = access_virt_sarray;
-  mem->pub.access_virt_barray = access_virt_barray;
-  mem->pub.free_pool = free_pool;
-  mem->pub.self_destruct = self_destruct;
-
-  /* Make MAX_ALLOC_CHUNK accessible to other modules */
-  mem->pub.max_alloc_chunk = MAX_ALLOC_CHUNK;
-
-  /* Initialize working state */
-  mem->pub.max_memory_to_use = max_to_use;
-
-  for (pool = JPOOL_NUMPOOLS-1; pool >= JPOOL_PERMANENT; pool--) {
-    mem->small_list[pool] = NULL;
-    mem->large_list[pool] = NULL;
-  }
-  mem->virt_sarray_list = NULL;
-  mem->virt_barray_list = NULL;
-
-  mem->total_space_allocated = SIZEOF(my_memory_mgr);
-
-  /* Declare ourselves open for business */
-  cinfo->mem = &mem->pub;
-
-  /* Check for an environment variable JPEGMEM; if found, override the
-   * default max_memory setting from jpeg_mem_init.  Note that the
-   * surrounding application may again override this value.
-   * If your system doesn't support getenv(), define NO_GETENV to disable
-   * this feature.
-   */
-#ifndef NO_GETENV
-  { char * memenv;
-
-    if ((memenv = getenv("JPEGMEM")) != NULL) {
-      char ch = 'x';
-
-      if (sscanf(memenv, "%ld%c", &max_to_use, &ch) > 0) {
-	if (ch == 'm' || ch == 'M')
-	  max_to_use *= 1000L;
-	mem->pub.max_memory_to_use = max_to_use * 1000L;
-      }
-    }
-  }
-#endif
-
-}
diff --git a/dep/libjpeg/src/jmemnobs.c b/dep/libjpeg/src/jmemnobs.c
deleted file mode 100644
index a364cd822..000000000
--- a/dep/libjpeg/src/jmemnobs.c
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * jmemnobs.c
- *
- * Copyright (C) 1992-1996, Thomas G. Lane.
- * Modified 2019 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file provides a really simple implementation of the system-
- * dependent portion of the JPEG memory manager.  This implementation
- * assumes that no backing-store files are needed: all required space
- * can be obtained from malloc().
- * This is very portable in the sense that it'll compile on almost anything,
- * but you'd better have lots of main memory (or virtual memory) if you want
- * to process big images.
- * Note that the max_memory_to_use option is respected by this implementation.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jmemsys.h"		/* import the system-dependent declarations */
-
-#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc(),free() */
-extern void * malloc JPP((size_t size));
-extern void free JPP((void *ptr));
-#endif
-
-
-/*
- * Memory allocation and freeing are controlled by the regular library
- * routines malloc() and free().
- */
-
-GLOBAL(void *)
-jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject)
-{
-  return (void *) malloc(sizeofobject);
-}
-
-GLOBAL(void)
-jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject)
-{
-  free(object);
-}
-
-
-/*
- * "Large" objects are treated the same as "small" ones.
- * NB: although we include FAR keywords in the routine declarations,
- * this file won't actually work in 80x86 small/medium model; at least,
- * you probably won't be able to process useful-size images in only 64KB.
- */
-
-GLOBAL(void FAR *)
-jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject)
-{
-  return (void FAR *) malloc(sizeofobject);
-}
-
-GLOBAL(void)
-jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject)
-{
-  free(object);
-}
-
-
-/*
- * This routine computes the total memory space available for allocation.
- */
-
-GLOBAL(long)
-jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed,
-		    long max_bytes_needed, long already_allocated)
-{
-  if (cinfo->mem->max_memory_to_use)
-    return cinfo->mem->max_memory_to_use - already_allocated;
-
-  /* Here we say, "we got all you want bud!" */
-  return max_bytes_needed;
-}
-
-
-/*
- * Backing store (temporary file) management.
- * Since jpeg_mem_available always promised the moon,
- * this should never be called and we can just error out.
- */
-
-GLOBAL(void)
-jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info,
-			 long total_bytes_needed)
-{
-  ERREXIT(cinfo, JERR_NO_BACKING_STORE);
-}
-
-
-/*
- * These routines take care of any system-dependent initialization and
- * cleanup required.  Here, there isn't any.
- */
-
-GLOBAL(long)
-jpeg_mem_init (j_common_ptr cinfo)
-{
-  return 0;			/* just set max_memory_to_use to 0 */
-}
-
-GLOBAL(void)
-jpeg_mem_term (j_common_ptr cinfo)
-{
-  /* no work */
-}
diff --git a/dep/libjpeg/src/jmemsys.h b/dep/libjpeg/src/jmemsys.h
deleted file mode 100644
index 6c3c6d348..000000000
--- a/dep/libjpeg/src/jmemsys.h
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * jmemsys.h
- *
- * Copyright (C) 1992-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This include file defines the interface between the system-independent
- * and system-dependent portions of the JPEG memory manager.  No other
- * modules need include it.  (The system-independent portion is jmemmgr.c;
- * there are several different versions of the system-dependent portion.)
- *
- * This file works as-is for the system-dependent memory managers supplied
- * in the IJG distribution.  You may need to modify it if you write a
- * custom memory manager.  If system-dependent changes are needed in
- * this file, the best method is to #ifdef them based on a configuration
- * symbol supplied in jconfig.h, as we have done with USE_MSDOS_MEMMGR
- * and USE_MAC_MEMMGR.
- */
-
-
-/* Short forms of external names for systems with brain-damaged linkers. */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_get_small		jGetSmall
-#define jpeg_free_small		jFreeSmall
-#define jpeg_get_large		jGetLarge
-#define jpeg_free_large		jFreeLarge
-#define jpeg_mem_available	jMemAvail
-#define jpeg_open_backing_store	jOpenBackStore
-#define jpeg_mem_init		jMemInit
-#define jpeg_mem_term		jMemTerm
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
-
-/*
- * These two functions are used to allocate and release small chunks of
- * memory.  (Typically the total amount requested through jpeg_get_small is
- * no more than 20K or so; this will be requested in chunks of a few K each.)
- * Behavior should be the same as for the standard library functions malloc
- * and free; in particular, jpeg_get_small must return NULL on failure.
- * On most systems, these ARE malloc and free.  jpeg_free_small is passed the
- * size of the object being freed, just in case it's needed.
- * On an 80x86 machine using small-data memory model, these manage near heap.
- */
-
-EXTERN(void *) jpeg_get_small JPP((j_common_ptr cinfo, size_t sizeofobject));
-EXTERN(void) jpeg_free_small JPP((j_common_ptr cinfo, void * object,
-				  size_t sizeofobject));
-
-/*
- * These two functions are used to allocate and release large chunks of
- * memory (up to the total free space designated by jpeg_mem_available).
- * The interface is the same as above, except that on an 80x86 machine,
- * far pointers are used.  On most other machines these are identical to
- * the jpeg_get/free_small routines; but we keep them separate anyway,
- * in case a different allocation strategy is desirable for large chunks.
- */
-
-EXTERN(void FAR *) jpeg_get_large JPP((j_common_ptr cinfo,
-				       size_t sizeofobject));
-EXTERN(void) jpeg_free_large JPP((j_common_ptr cinfo, void FAR * object,
-				  size_t sizeofobject));
-
-/*
- * The macro MAX_ALLOC_CHUNK designates the maximum number of bytes that may
- * be requested in a single call to jpeg_get_large (and jpeg_get_small for that
- * matter, but that case should never come into play).  This macro is needed
- * to model the 64Kb-segment-size limit of far addressing on 80x86 machines.
- * On those machines, we expect that jconfig.h will provide a proper value.
- * On machines with 32-bit flat address spaces, any large constant may be used.
- *
- * NB: jmemmgr.c expects that MAX_ALLOC_CHUNK will be representable as type
- * size_t and will be a multiple of sizeof(align_type).
- */
-
-#ifndef MAX_ALLOC_CHUNK		/* may be overridden in jconfig.h */
-#define MAX_ALLOC_CHUNK  1000000000L
-#endif
-
-/*
- * This routine computes the total space still available for allocation by
- * jpeg_get_large.  If more space than this is needed, backing store will be
- * used.  NOTE: any memory already allocated must not be counted.
- *
- * There is a minimum space requirement, corresponding to the minimum
- * feasible buffer sizes; jmemmgr.c will request that much space even if
- * jpeg_mem_available returns zero.  The maximum space needed, enough to hold
- * all working storage in memory, is also passed in case it is useful.
- * Finally, the total space already allocated is passed.  If no better
- * method is available, cinfo->mem->max_memory_to_use - already_allocated
- * is often a suitable calculation.
- *
- * It is OK for jpeg_mem_available to underestimate the space available
- * (that'll just lead to more backing-store access than is really necessary).
- * However, an overestimate will lead to failure.  Hence it's wise to subtract
- * a slop factor from the true available space.  5% should be enough.
- *
- * On machines with lots of virtual memory, any large constant may be returned.
- * Conversely, zero may be returned to always use the minimum amount of memory.
- */
-
-EXTERN(long) jpeg_mem_available JPP((j_common_ptr cinfo,
-				     long min_bytes_needed,
-				     long max_bytes_needed,
-				     long already_allocated));
-
-
-/*
- * This structure holds whatever state is needed to access a single
- * backing-store object.  The read/write/close method pointers are called
- * by jmemmgr.c to manipulate the backing-store object; all other fields
- * are private to the system-dependent backing store routines.
- */
-
-#define TEMP_NAME_LENGTH   64	/* max length of a temporary file's name */
-
-
-#ifdef USE_MSDOS_MEMMGR		/* DOS-specific junk */
-
-typedef unsigned short XMSH;	/* type of extended-memory handles */
-typedef unsigned short EMSH;	/* type of expanded-memory handles */
-
-typedef union {
-  short file_handle;		/* DOS file handle if it's a temp file */
-  XMSH xms_handle;		/* handle if it's a chunk of XMS */
-  EMSH ems_handle;		/* handle if it's a chunk of EMS */
-} handle_union;
-
-#endif /* USE_MSDOS_MEMMGR */
-
-#ifdef USE_MAC_MEMMGR		/* Mac-specific junk */
-#include <Files.h>
-#endif /* USE_MAC_MEMMGR */
-
-
-typedef struct backing_store_struct * backing_store_ptr;
-
-typedef struct backing_store_struct {
-  /* Methods for reading/writing/closing this backing-store object */
-  JMETHOD(void, read_backing_store, (j_common_ptr cinfo,
-				     backing_store_ptr info,
-				     void FAR * buffer_address,
-				     long file_offset, long byte_count));
-  JMETHOD(void, write_backing_store, (j_common_ptr cinfo,
-				      backing_store_ptr info,
-				      void FAR * buffer_address,
-				      long file_offset, long byte_count));
-  JMETHOD(void, close_backing_store, (j_common_ptr cinfo,
-				      backing_store_ptr info));
-
-  /* Private fields for system-dependent backing-store management */
-#ifdef USE_MSDOS_MEMMGR
-  /* For the MS-DOS manager (jmemdos.c), we need: */
-  handle_union handle;		/* reference to backing-store storage object */
-  char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
-#else
-#ifdef USE_MAC_MEMMGR
-  /* For the Mac manager (jmemmac.c), we need: */
-  short temp_file;		/* file reference number to temp file */
-  FSSpec tempSpec;		/* the FSSpec for the temp file */
-  char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
-#else
-  /* For a typical implementation with temp files, we need: */
-  FILE * temp_file;		/* stdio reference to temp file */
-  char temp_name[TEMP_NAME_LENGTH]; /* name of temp file */
-#endif
-#endif
-} backing_store_info;
-
-
-/*
- * Initial opening of a backing-store object.  This must fill in the
- * read/write/close pointers in the object.  The read/write routines
- * may take an error exit if the specified maximum file size is exceeded.
- * (If jpeg_mem_available always returns a large value, this routine can
- * just take an error exit.)
- */
-
-EXTERN(void) jpeg_open_backing_store JPP((j_common_ptr cinfo,
-					  backing_store_ptr info,
-					  long total_bytes_needed));
-
-
-/*
- * These routines take care of any system-dependent initialization and
- * cleanup required.  jpeg_mem_init will be called before anything is
- * allocated (and, therefore, nothing in cinfo is of use except the error
- * manager pointer).  It should return a suitable default value for
- * max_memory_to_use; this may subsequently be overridden by the surrounding
- * application.  (Note that max_memory_to_use is only important if
- * jpeg_mem_available chooses to consult it ... no one else will.)
- * jpeg_mem_term may assume that all requested memory has been freed and that
- * all opened backing-store objects have been closed.
- */
-
-EXTERN(long) jpeg_mem_init JPP((j_common_ptr cinfo));
-EXTERN(void) jpeg_mem_term JPP((j_common_ptr cinfo));
diff --git a/dep/libjpeg/src/jquant1.c b/dep/libjpeg/src/jquant1.c
deleted file mode 100644
index 60b1843e7..000000000
--- a/dep/libjpeg/src/jquant1.c
+++ /dev/null
@@ -1,851 +0,0 @@
-/*
- * jquant1.c
- *
- * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modified 2011-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains 1-pass color quantization (color mapping) routines.
- * These routines provide mapping to a fixed color map using equally spaced
- * color values.  Optional Floyd-Steinberg or ordered dithering is available.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-#ifdef QUANT_1PASS_SUPPORTED
-
-
-/*
- * The main purpose of 1-pass quantization is to provide a fast, if not very
- * high quality, colormapped output capability.  A 2-pass quantizer usually
- * gives better visual quality; however, for quantized grayscale output this
- * quantizer is perfectly adequate.  Dithering is highly recommended with this
- * quantizer, though you can turn it off if you really want to.
- *
- * In 1-pass quantization the colormap must be chosen in advance of seeing the
- * image.  We use a map consisting of all combinations of Ncolors[i] color
- * values for the i'th component.  The Ncolors[] values are chosen so that
- * their product, the total number of colors, is no more than that requested.
- * (In most cases, the product will be somewhat less.)
- *
- * Since the colormap is orthogonal, the representative value for each color
- * component can be determined without considering the other components;
- * then these indexes can be combined into a colormap index by a standard
- * N-dimensional-array-subscript calculation.  Most of the arithmetic involved
- * can be precalculated and stored in the lookup table colorindex[].
- * colorindex[i][j] maps pixel value j in component i to the nearest
- * representative value (grid plane) for that component; this index is
- * multiplied by the array stride for component i, so that the
- * index of the colormap entry closest to a given pixel value is just
- *    sum( colorindex[component-number][pixel-component-value] )
- * Aside from being fast, this scheme allows for variable spacing between
- * representative values with no additional lookup cost.
- *
- * If gamma correction has been applied in color conversion, it might be wise
- * to adjust the color grid spacing so that the representative colors are
- * equidistant in linear space.  At this writing, gamma correction is not
- * implemented by jdcolor, so nothing is done here.
- */
-
-
-/* Declarations for ordered dithering.
- *
- * We use a standard 16x16 ordered dither array.  The basic concept of ordered
- * dithering is described in many references, for instance Dale Schumacher's
- * chapter II.2 of Graphics Gems II (James Arvo, ed. Academic Press, 1991).
- * In place of Schumacher's comparisons against a "threshold" value, we add a
- * "dither" value to the input pixel and then round the result to the nearest
- * output value.  The dither value is equivalent to (0.5 - threshold) times
- * the distance between output values.  For ordered dithering, we assume that
- * the output colors are equally spaced; if not, results will probably be
- * worse, since the dither may be too much or too little at a given point.
- *
- * The normal calculation would be to form pixel value + dither, range-limit
- * this to 0..MAXJSAMPLE, and then index into the colorindex table as usual.
- * We can skip the separate range-limiting step by extending the colorindex
- * table in both directions.
- */
-
-#define ODITHER_SIZE  16	/* dimension of dither matrix */
-/* NB: if ODITHER_SIZE is not a power of 2, ODITHER_MASK uses will break */
-#define ODITHER_CELLS (ODITHER_SIZE*ODITHER_SIZE)	/* # cells in matrix */
-#define ODITHER_MASK  (ODITHER_SIZE-1) /* mask for wrapping around counters */
-
-typedef int ODITHER_MATRIX[ODITHER_SIZE][ODITHER_SIZE];
-typedef int (*ODITHER_MATRIX_PTR)[ODITHER_SIZE];
-
-static const UINT8 base_dither_matrix[ODITHER_SIZE][ODITHER_SIZE] = {
-  /* Bayer's order-4 dither array.  Generated by the code given in
-   * Stephen Hawley's article "Ordered Dithering" in Graphics Gems I.
-   * The values in this array must range from 0 to ODITHER_CELLS-1.
-   */
-  {   0,192, 48,240, 12,204, 60,252,  3,195, 51,243, 15,207, 63,255 },
-  { 128, 64,176,112,140, 76,188,124,131, 67,179,115,143, 79,191,127 },
-  {  32,224, 16,208, 44,236, 28,220, 35,227, 19,211, 47,239, 31,223 },
-  { 160, 96,144, 80,172,108,156, 92,163, 99,147, 83,175,111,159, 95 },
-  {   8,200, 56,248,  4,196, 52,244, 11,203, 59,251,  7,199, 55,247 },
-  { 136, 72,184,120,132, 68,180,116,139, 75,187,123,135, 71,183,119 },
-  {  40,232, 24,216, 36,228, 20,212, 43,235, 27,219, 39,231, 23,215 },
-  { 168,104,152, 88,164,100,148, 84,171,107,155, 91,167,103,151, 87 },
-  {   2,194, 50,242, 14,206, 62,254,  1,193, 49,241, 13,205, 61,253 },
-  { 130, 66,178,114,142, 78,190,126,129, 65,177,113,141, 77,189,125 },
-  {  34,226, 18,210, 46,238, 30,222, 33,225, 17,209, 45,237, 29,221 },
-  { 162, 98,146, 82,174,110,158, 94,161, 97,145, 81,173,109,157, 93 },
-  {  10,202, 58,250,  6,198, 54,246,  9,201, 57,249,  5,197, 53,245 },
-  { 138, 74,186,122,134, 70,182,118,137, 73,185,121,133, 69,181,117 },
-  {  42,234, 26,218, 38,230, 22,214, 41,233, 25,217, 37,229, 21,213 },
-  { 170,106,154, 90,166,102,150, 86,169,105,153, 89,165,101,149, 85 }
-};
-
-
-/* Declarations for Floyd-Steinberg dithering.
- *
- * Errors are accumulated into the array fserrors[], at a resolution of
- * 1/16th of a pixel count.  The error at a given pixel is propagated
- * to its not-yet-processed neighbors using the standard F-S fractions,
- *		...	(here)	7/16
- *		3/16	5/16	1/16
- * We work left-to-right on even rows, right-to-left on odd rows.
- *
- * We can get away with a single array (holding one row's worth of errors)
- * by using it to store the current row's errors at pixel columns not yet
- * processed, but the next row's errors at columns already processed.  We
- * need only a few extra variables to hold the errors immediately around the
- * current column.  (If we are lucky, those variables are in registers, but
- * even if not, they're probably cheaper to access than array elements are.)
- *
- * The fserrors[] array is indexed [component#][position].
- * We provide (#columns + 2) entries per component; the extra entry at each
- * end saves us from special-casing the first and last pixels.
- *
- * Note: on a wide image, we might not have enough room in a PC's near data
- * segment to hold the error array; so it is allocated with alloc_large.
- */
-
-#if BITS_IN_JSAMPLE == 8
-typedef INT16 FSERROR;		/* 16 bits should be enough */
-typedef int LOCFSERROR;		/* use 'int' for calculation temps */
-#else
-typedef INT32 FSERROR;		/* may need more than 16 bits */
-typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
-#endif
-
-typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
-
-
-/* Private subobject */
-
-#define MAX_Q_COMPS 4		/* max components I can handle */
-
-typedef struct {
-  struct jpeg_color_quantizer pub; /* public fields */
-
-  /* Initially allocated colormap is saved here */
-  JSAMPARRAY sv_colormap;	/* The color map as a 2-D pixel array */
-  int sv_actual;		/* number of entries in use */
-
-  JSAMPARRAY colorindex;	/* Precomputed mapping for speed */
-  /* colorindex[i][j] = index of color closest to pixel value j in component i,
-   * premultiplied as described above.  Since colormap indexes must fit into
-   * JSAMPLEs, the entries of this array will too.
-   */
-  boolean is_padded;		/* is the colorindex padded for odither? */
-
-  int Ncolors[MAX_Q_COMPS];	/* # of values alloced to each component */
-
-  /* Variables for ordered dithering */
-  int row_index;		/* cur row's vertical index in dither matrix */
-  ODITHER_MATRIX_PTR odither[MAX_Q_COMPS]; /* one dither array per component */
-
-  /* Variables for Floyd-Steinberg dithering */
-  FSERRPTR fserrors[MAX_Q_COMPS]; /* accumulated errors */
-  boolean on_odd_row;		/* flag to remember which row we are on */
-} my_cquantizer;
-
-typedef my_cquantizer * my_cquantize_ptr;
-
-
-/*
- * Policy-making subroutines for create_colormap and create_colorindex.
- * These routines determine the colormap to be used.  The rest of the module
- * only assumes that the colormap is orthogonal.
- *
- *  * select_ncolors decides how to divvy up the available colors
- *    among the components.
- *  * output_value defines the set of representative values for a component.
- *  * largest_input_value defines the mapping from input values to
- *    representative values for a component.
- * Note that the latter two routines may impose different policies for
- * different components, though this is not currently done.
- */
-
-
-LOCAL(int)
-select_ncolors (j_decompress_ptr cinfo, int Ncolors[])
-/* Determine allocation of desired colors to components, */
-/* and fill in Ncolors[] array to indicate choice. */
-/* Return value is total number of colors (product of Ncolors[] values). */
-{
-  int nc = cinfo->out_color_components; /* number of color components */
-  int max_colors = cinfo->desired_number_of_colors;
-  int total_colors, iroot, i, j;
-  boolean changed;
-  long temp;
-  static const int RGB_order[3] = { RGB_GREEN, RGB_RED, RGB_BLUE };
-
-  /* We can allocate at least the nc'th root of max_colors per component. */
-  /* Compute floor(nc'th root of max_colors). */
-  iroot = 1;
-  do {
-    iroot++;
-    temp = iroot;		/* set temp = iroot ** nc */
-    for (i = 1; i < nc; i++)
-      temp *= iroot;
-  } while (temp <= (long) max_colors); /* repeat till iroot exceeds root */
-  iroot--;			/* now iroot = floor(root) */
-
-  /* Must have at least 2 color values per component */
-  if (iroot < 2)
-    ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, (int) temp);
-
-  /* Initialize to iroot color values for each component */
-  total_colors = 1;
-  for (i = 0; i < nc; i++) {
-    Ncolors[i] = iroot;
-    total_colors *= iroot;
-  }
-  /* We may be able to increment the count for one or more components without
-   * exceeding max_colors, though we know not all can be incremented.
-   * Sometimes, the first component can be incremented more than once!
-   * (Example: for 16 colors, we start at 2*2*2, go to 3*2*2, then 4*2*2.)
-   * In RGB colorspace, try to increment G first, then R, then B.
-   */
-  do {
-    changed = FALSE;
-    for (i = 0; i < nc; i++) {
-      j = (cinfo->out_color_space == JCS_RGB ? RGB_order[i] : i);
-      /* calculate new total_colors if Ncolors[j] is incremented */
-      temp = total_colors / Ncolors[j];
-      temp *= Ncolors[j]+1;	/* done in long arith to avoid oflo */
-      if (temp > (long) max_colors)
-	break;			/* won't fit, done with this pass */
-      Ncolors[j]++;		/* OK, apply the increment */
-      total_colors = (int) temp;
-      changed = TRUE;
-    }
-  } while (changed);
-
-  return total_colors;
-}
-
-
-LOCAL(int)
-output_value (j_decompress_ptr cinfo, int ci, int j, int maxj)
-/* Return j'th output value, where j will range from 0 to maxj */
-/* The output values must fall in 0..MAXJSAMPLE in increasing order */
-{
-  /* We always provide values 0 and MAXJSAMPLE for each component;
-   * any additional values are equally spaced between these limits.
-   * (Forcing the upper and lower values to the limits ensures that
-   * dithering can't produce a color outside the selected gamut.)
-   */
-  return (int) (((INT32) j * MAXJSAMPLE + maxj/2) / maxj);
-}
-
-
-LOCAL(int)
-largest_input_value (j_decompress_ptr cinfo, int ci, int j, int maxj)
-/* Return largest input value that should map to j'th output value */
-/* Must have largest(j=0) >= 0, and largest(j=maxj) >= MAXJSAMPLE */
-{
-  /* Breakpoints are halfway between values returned by output_value */
-  return (int) (((INT32) (2*j + 1) * MAXJSAMPLE + maxj) / (2*maxj));
-}
-
-
-/*
- * Create the colormap.
- */
-
-LOCAL(void)
-create_colormap (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  JSAMPARRAY colormap;		/* Created colormap */
-  int total_colors;		/* Number of distinct output colors */
-  int i,j,k, nci, blksize, blkdist, ptr, val;
-
-  /* Select number of colors for each component */
-  total_colors = select_ncolors(cinfo, cquantize->Ncolors);
-
-  /* Report selected color counts */
-  if (cinfo->out_color_components == 3)
-    TRACEMS4(cinfo, 1, JTRC_QUANT_3_NCOLORS,
-	     total_colors, cquantize->Ncolors[0],
-	     cquantize->Ncolors[1], cquantize->Ncolors[2]);
-  else
-    TRACEMS1(cinfo, 1, JTRC_QUANT_NCOLORS, total_colors);
-
-  /* Allocate and fill in the colormap. */
-  /* The colors are ordered in the map in standard row-major order, */
-  /* i.e. rightmost (highest-indexed) color changes most rapidly. */
-
-  colormap = (*cinfo->mem->alloc_sarray) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-     (JDIMENSION) total_colors, (JDIMENSION) cinfo->out_color_components);
-
-  /* blksize is number of adjacent repeated entries for a component */
-  /* blkdist is distance between groups of identical entries for a component */
-  blkdist = total_colors;
-
-  for (i = 0; i < cinfo->out_color_components; i++) {
-    /* fill in colormap entries for i'th color component */
-    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
-    blksize = blkdist / nci;
-    for (j = 0; j < nci; j++) {
-      /* Compute j'th output value (out of nci) for component */
-      val = output_value(cinfo, i, j, nci-1);
-      /* Fill in all colormap entries that have this value of this component */
-      for (ptr = j * blksize; ptr < total_colors; ptr += blkdist) {
-	/* fill in blksize entries beginning at ptr */
-	for (k = 0; k < blksize; k++)
-	  colormap[i][ptr+k] = (JSAMPLE) val;
-      }
-    }
-    blkdist = blksize;		/* blksize of this color is blkdist of next */
-  }
-
-  /* Save the colormap in private storage,
-   * where it will survive color quantization mode changes.
-   */
-  cquantize->sv_colormap = colormap;
-  cquantize->sv_actual = total_colors;
-}
-
-
-/*
- * Create the color index table.
- */
-
-LOCAL(void)
-create_colorindex (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  JSAMPROW indexptr;
-  int i,j,k, nci, blksize, val, pad;
-
-  /* For ordered dither, we pad the color index tables by MAXJSAMPLE in
-   * each direction (input index values can be -MAXJSAMPLE .. 2*MAXJSAMPLE).
-   * This is not necessary in the other dithering modes.  However, we
-   * flag whether it was done in case user changes dithering mode.
-   */
-  if (cinfo->dither_mode == JDITHER_ORDERED) {
-    pad = MAXJSAMPLE*2;
-    cquantize->is_padded = TRUE;
-  } else {
-    pad = 0;
-    cquantize->is_padded = FALSE;
-  }
-
-  cquantize->colorindex = (*cinfo->mem->alloc_sarray)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE,
-     (JDIMENSION) (MAXJSAMPLE+1 + pad),
-     (JDIMENSION) cinfo->out_color_components);
-
-  /* blksize is number of adjacent repeated entries for a component */
-  blksize = cquantize->sv_actual;
-
-  for (i = 0; i < cinfo->out_color_components; i++) {
-    /* fill in colorindex entries for i'th color component */
-    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
-    blksize = blksize / nci;
-
-    /* adjust colorindex pointers to provide padding at negative indexes. */
-    if (pad)
-      cquantize->colorindex[i] += MAXJSAMPLE;
-
-    /* in loop, val = index of current output value, */
-    /* and k = largest j that maps to current val */
-    indexptr = cquantize->colorindex[i];
-    val = 0;
-    k = largest_input_value(cinfo, i, 0, nci-1);
-    for (j = 0; j <= MAXJSAMPLE; j++) {
-      while (j > k)		/* advance val if past boundary */
-	k = largest_input_value(cinfo, i, ++val, nci-1);
-      /* premultiply so that no multiplication needed in main processing */
-      indexptr[j] = (JSAMPLE) (val * blksize);
-    }
-    /* Pad at both ends if necessary */
-    if (pad)
-      for (j = 1; j <= MAXJSAMPLE; j++) {
-	indexptr[-j] = indexptr[0];
-	indexptr[MAXJSAMPLE+j] = indexptr[MAXJSAMPLE];
-      }
-  }
-}
-
-
-/*
- * Create an ordered-dither array for a component having ncolors
- * distinct output values.
- */
-
-LOCAL(ODITHER_MATRIX_PTR)
-make_odither_array (j_decompress_ptr cinfo, int ncolors)
-{
-  ODITHER_MATRIX_PTR odither;
-  int j,k;
-  INT32 num,den;
-
-  odither = (ODITHER_MATRIX_PTR) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(ODITHER_MATRIX));
-  /* The inter-value distance for this color is MAXJSAMPLE/(ncolors-1).
-   * Hence the dither value for the matrix cell with fill order f
-   * (f=0..N-1) should be (N-1-2*f)/(2*N) * MAXJSAMPLE/(ncolors-1).
-   * On 16-bit-int machine, be careful to avoid overflow.
-   */
-  den = 2 * ODITHER_CELLS * ((INT32) (ncolors - 1));
-  for (j = 0; j < ODITHER_SIZE; j++) {
-    for (k = 0; k < ODITHER_SIZE; k++) {
-      num = ((INT32) (ODITHER_CELLS-1 - 2*((int)base_dither_matrix[j][k])))
-	    * MAXJSAMPLE;
-      /* Ensure round towards zero despite C's lack of consistency
-       * about rounding negative values in integer division...
-       */
-      odither[j][k] = (int) (num<0 ? -((-num)/den) : num/den);
-    }
-  }
-  return odither;
-}
-
-
-/*
- * Create the ordered-dither tables.
- * Components having the same number of representative colors may 
- * share a dither table.
- */
-
-LOCAL(void)
-create_odither_tables (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  ODITHER_MATRIX_PTR odither;
-  int i, j, nci;
-
-  for (i = 0; i < cinfo->out_color_components; i++) {
-    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
-    odither = NULL;		/* search for matching prior component */
-    for (j = 0; j < i; j++) {
-      if (nci == cquantize->Ncolors[j]) {
-	odither = cquantize->odither[j];
-	break;
-      }
-    }
-    if (odither == NULL)	/* need a new table? */
-      odither = make_odither_array(cinfo, nci);
-    cquantize->odither[i] = odither;
-  }
-}
-
-
-/*
- * Map some rows of pixels to the output colormapped representation.
- */
-
-METHODDEF(void)
-color_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		JSAMPARRAY output_buf, int num_rows)
-/* General case, no dithering */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  JSAMPARRAY colorindex = cquantize->colorindex;
-  register int pixcode, ci;
-  register JSAMPROW ptrin, ptrout;
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-  register int nc = cinfo->out_color_components;
-
-  for (row = 0; row < num_rows; row++) {
-    ptrin = input_buf[row];
-    ptrout = output_buf[row];
-    for (col = width; col > 0; col--) {
-      pixcode = 0;
-      for (ci = 0; ci < nc; ci++) {
-	pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]);
-      }
-      *ptrout++ = (JSAMPLE) pixcode;
-    }
-  }
-}
-
-
-METHODDEF(void)
-color_quantize3 (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		 JSAMPARRAY output_buf, int num_rows)
-/* Fast path for out_color_components==3, no dithering */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  register int pixcode;
-  register JSAMPROW ptrin, ptrout;
-  JSAMPROW colorindex0 = cquantize->colorindex[0];
-  JSAMPROW colorindex1 = cquantize->colorindex[1];
-  JSAMPROW colorindex2 = cquantize->colorindex[2];
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-
-  for (row = 0; row < num_rows; row++) {
-    ptrin = input_buf[row];
-    ptrout = output_buf[row];
-    for (col = width; col > 0; col--) {
-      pixcode  = GETJSAMPLE(colorindex0[GETJSAMPLE(*ptrin++)]);
-      pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*ptrin++)]);
-      pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*ptrin++)]);
-      *ptrout++ = (JSAMPLE) pixcode;
-    }
-  }
-}
-
-
-METHODDEF(void)
-quantize_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		     JSAMPARRAY output_buf, int num_rows)
-/* General case, with ordered dithering */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  register JSAMPROW input_ptr;
-  register JSAMPROW output_ptr;
-  JSAMPROW colorindex_ci;
-  int * dither;			/* points to active row of dither matrix */
-  int row_index, col_index;	/* current indexes into dither matrix */
-  int nc = cinfo->out_color_components;
-  int ci;
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-
-  for (row = 0; row < num_rows; row++) {
-    /* Initialize output values to 0 so can process components separately */
-    FMEMZERO((void FAR *) output_buf[row], (size_t) width * SIZEOF(JSAMPLE));
-    row_index = cquantize->row_index;
-    for (ci = 0; ci < nc; ci++) {
-      input_ptr = input_buf[row] + ci;
-      output_ptr = output_buf[row];
-      colorindex_ci = cquantize->colorindex[ci];
-      dither = cquantize->odither[ci][row_index];
-      col_index = 0;
-
-      for (col = width; col > 0; col--) {
-	/* Form pixel value + dither, range-limit to 0..MAXJSAMPLE,
-	 * select output value, accumulate into output code for this pixel.
-	 * Range-limiting need not be done explicitly, as we have extended
-	 * the colorindex table to produce the right answers for out-of-range
-	 * inputs.  The maximum dither is +- MAXJSAMPLE; this sets the
-	 * required amount of padding.
-	 */
-	*output_ptr += colorindex_ci[GETJSAMPLE(*input_ptr)+dither[col_index]];
-	input_ptr += nc;
-	output_ptr++;
-	col_index = (col_index + 1) & ODITHER_MASK;
-      }
-    }
-    /* Advance row index for next row */
-    row_index = (row_index + 1) & ODITHER_MASK;
-    cquantize->row_index = row_index;
-  }
-}
-
-
-METHODDEF(void)
-quantize3_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		      JSAMPARRAY output_buf, int num_rows)
-/* Fast path for out_color_components==3, with ordered dithering */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  register int pixcode;
-  register JSAMPROW input_ptr;
-  register JSAMPROW output_ptr;
-  JSAMPROW colorindex0 = cquantize->colorindex[0];
-  JSAMPROW colorindex1 = cquantize->colorindex[1];
-  JSAMPROW colorindex2 = cquantize->colorindex[2];
-  int * dither0;		/* points to active row of dither matrix */
-  int * dither1;
-  int * dither2;
-  int row_index, col_index;	/* current indexes into dither matrix */
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-
-  for (row = 0; row < num_rows; row++) {
-    row_index = cquantize->row_index;
-    input_ptr = input_buf[row];
-    output_ptr = output_buf[row];
-    dither0 = cquantize->odither[0][row_index];
-    dither1 = cquantize->odither[1][row_index];
-    dither2 = cquantize->odither[2][row_index];
-    col_index = 0;
-
-    for (col = width; col > 0; col--) {
-      pixcode  = GETJSAMPLE(colorindex0[GETJSAMPLE(*input_ptr++) +
-					dither0[col_index]]);
-      pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*input_ptr++) +
-					dither1[col_index]]);
-      pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*input_ptr++) +
-					dither2[col_index]]);
-      *output_ptr++ = (JSAMPLE) pixcode;
-      col_index = (col_index + 1) & ODITHER_MASK;
-    }
-    row_index = (row_index + 1) & ODITHER_MASK;
-    cquantize->row_index = row_index;
-  }
-}
-
-
-METHODDEF(void)
-quantize_fs_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		    JSAMPARRAY output_buf, int num_rows)
-/* General case, with Floyd-Steinberg dithering */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  register LOCFSERROR cur;	/* current error or pixel value */
-  LOCFSERROR belowerr;		/* error for pixel below cur */
-  LOCFSERROR bpreverr;		/* error for below/prev col */
-  LOCFSERROR bnexterr;		/* error for below/next col */
-  LOCFSERROR delta;
-  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
-  register JSAMPROW input_ptr;
-  register JSAMPROW output_ptr;
-  JSAMPROW colorindex_ci;
-  JSAMPROW colormap_ci;
-  int pixcode;
-  int nc = cinfo->out_color_components;
-  int dir;			/* 1 for left-to-right, -1 for right-to-left */
-  int dirnc;			/* dir * nc */
-  int ci;
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-  JSAMPLE *range_limit = cinfo->sample_range_limit;
-  SHIFT_TEMPS
-
-  for (row = 0; row < num_rows; row++) {
-    /* Initialize output values to 0 so can process components separately */
-    FMEMZERO((void FAR *) output_buf[row], (size_t) width * SIZEOF(JSAMPLE));
-    for (ci = 0; ci < nc; ci++) {
-      input_ptr = input_buf[row] + ci;
-      output_ptr = output_buf[row];
-      if (cquantize->on_odd_row) {
-	/* work right to left in this row */
-	input_ptr += (width-1) * nc; /* so point to rightmost pixel */
-	output_ptr += width-1;
-	dir = -1;
-	dirnc = -nc;
-	errorptr = cquantize->fserrors[ci] + (width+1); /* => entry after last column */
-      } else {
-	/* work left to right in this row */
-	dir = 1;
-	dirnc = nc;
-	errorptr = cquantize->fserrors[ci]; /* => entry before first column */
-      }
-      colorindex_ci = cquantize->colorindex[ci];
-      colormap_ci = cquantize->sv_colormap[ci];
-      /* Preset error values: no error propagated to first pixel from left */
-      cur = 0;
-      /* and no error propagated to row below yet */
-      belowerr = bpreverr = 0;
-
-      for (col = width; col > 0; col--) {
-	/* cur holds the error propagated from the previous pixel on the
-	 * current line.  Add the error propagated from the previous line
-	 * to form the complete error correction term for this pixel, and
-	 * round the error term (which is expressed * 16) to an integer.
-	 * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
-	 * for either sign of the error value.
-	 * Note: errorptr points to *previous* column's array entry.
-	 */
-	cur = RIGHT_SHIFT(cur + errorptr[dir] + 8, 4);
-	/* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
-	 * The maximum error is +- MAXJSAMPLE; this sets the required size
-	 * of the range_limit array.
-	 */
-	cur += GETJSAMPLE(*input_ptr);
-	cur = GETJSAMPLE(range_limit[cur]);
-	/* Select output value, accumulate into output code for this pixel */
-	pixcode = GETJSAMPLE(colorindex_ci[cur]);
-	*output_ptr += (JSAMPLE) pixcode;
-	/* Compute actual representation error at this pixel */
-	/* Note: we can do this even though we don't have the final */
-	/* pixel code, because the colormap is orthogonal. */
-	cur -= GETJSAMPLE(colormap_ci[pixcode]);
-	/* Compute error fractions to be propagated to adjacent pixels.
-	 * Add these into the running sums, and simultaneously shift the
-	 * next-line error sums left by 1 column.
-	 */
-	bnexterr = cur;
-	delta = cur * 2;
-	cur += delta;		/* form error * 3 */
-	errorptr[0] = (FSERROR) (bpreverr + cur);
-	cur += delta;		/* form error * 5 */
-	bpreverr = belowerr + cur;
-	belowerr = bnexterr;
-	cur += delta;		/* form error * 7 */
-	/* At this point cur contains the 7/16 error value to be propagated
-	 * to the next pixel on the current line, and all the errors for the
-	 * next line have been shifted over. We are therefore ready to move on.
-	 */
-	input_ptr += dirnc;	/* advance input ptr to next column */
-	output_ptr += dir;	/* advance output ptr to next column */
-	errorptr += dir;	/* advance errorptr to current column */
-      }
-      /* Post-loop cleanup: we must unload the final error value into the
-       * final fserrors[] entry.  Note we need not unload belowerr because
-       * it is for the dummy column before or after the actual array.
-       */
-      errorptr[0] = (FSERROR) bpreverr; /* unload prev err into array */
-    }
-    cquantize->on_odd_row = (cquantize->on_odd_row ? FALSE : TRUE);
-  }
-}
-
-
-/*
- * Allocate workspace for Floyd-Steinberg errors.
- */
-
-LOCAL(void)
-alloc_fs_workspace (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  size_t arraysize;
-  int i;
-
-  arraysize = ((size_t) cinfo->output_width + (size_t) 2) * SIZEOF(FSERROR);
-  for (i = 0; i < cinfo->out_color_components; i++) {
-    cquantize->fserrors[i] = (FSERRPTR) (*cinfo->mem->alloc_large)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
-  }
-}
-
-
-/*
- * Initialize for one-pass color quantization.
- */
-
-METHODDEF(void)
-start_pass_1_quant (j_decompress_ptr cinfo, boolean is_pre_scan)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  size_t arraysize;
-  int i;
-
-  /* Install my colormap. */
-  cinfo->colormap = cquantize->sv_colormap;
-  cinfo->actual_number_of_colors = cquantize->sv_actual;
-
-  /* Initialize for desired dithering mode. */
-  switch (cinfo->dither_mode) {
-  case JDITHER_NONE:
-    if (cinfo->out_color_components == 3)
-      cquantize->pub.color_quantize = color_quantize3;
-    else
-      cquantize->pub.color_quantize = color_quantize;
-    break;
-  case JDITHER_ORDERED:
-    if (cinfo->out_color_components == 3)
-      cquantize->pub.color_quantize = quantize3_ord_dither;
-    else
-      cquantize->pub.color_quantize = quantize_ord_dither;
-    cquantize->row_index = 0;	/* initialize state for ordered dither */
-    /* If user changed to ordered dither from another mode,
-     * we must recreate the color index table with padding.
-     * This will cost extra space, but probably isn't very likely.
-     */
-    if (! cquantize->is_padded)
-      create_colorindex(cinfo);
-    /* Create ordered-dither tables if we didn't already. */
-    if (cquantize->odither[0] == NULL)
-      create_odither_tables(cinfo);
-    break;
-  case JDITHER_FS:
-    cquantize->pub.color_quantize = quantize_fs_dither;
-    cquantize->on_odd_row = FALSE; /* initialize state for F-S dither */
-    /* Allocate Floyd-Steinberg workspace if didn't already. */
-    if (cquantize->fserrors[0] == NULL)
-      alloc_fs_workspace(cinfo);
-    /* Initialize the propagated errors to zero. */
-    arraysize = ((size_t) cinfo->output_width + (size_t) 2) * SIZEOF(FSERROR);
-    for (i = 0; i < cinfo->out_color_components; i++)
-      FMEMZERO((void FAR *) cquantize->fserrors[i], arraysize);
-    break;
-  default:
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-  }
-}
-
-
-/*
- * Finish up at the end of the pass.
- */
-
-METHODDEF(void)
-finish_pass_1_quant (j_decompress_ptr cinfo)
-{
-  /* no work in 1-pass case */
-}
-
-
-/*
- * Switch to a new external colormap between output passes.
- * Shouldn't get to this module!
- */
-
-METHODDEF(void)
-new_color_map_1_quant (j_decompress_ptr cinfo)
-{
-  ERREXIT(cinfo, JERR_MODE_CHANGE);
-}
-
-
-/*
- * Module initialization routine for 1-pass color quantization.
- */
-
-GLOBAL(void)
-jinit_1pass_quantizer (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize;
-
-  cquantize = (my_cquantize_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_cquantizer));
-  cinfo->cquantize = &cquantize->pub;
-  cquantize->pub.start_pass = start_pass_1_quant;
-  cquantize->pub.finish_pass = finish_pass_1_quant;
-  cquantize->pub.new_color_map = new_color_map_1_quant;
-  cquantize->fserrors[0] = NULL; /* Flag FS workspace not allocated */
-  cquantize->odither[0] = NULL;	/* Also flag odither arrays not allocated */
-
-  /* Make sure my internal arrays won't overflow */
-  if (cinfo->out_color_components > MAX_Q_COMPS)
-    ERREXIT1(cinfo, JERR_QUANT_COMPONENTS, MAX_Q_COMPS);
-  /* Make sure colormap indexes can be represented by JSAMPLEs */
-  if (cinfo->desired_number_of_colors > (MAXJSAMPLE+1))
-    ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXJSAMPLE+1);
-
-  /* Create the colormap and color index table. */
-  create_colormap(cinfo);
-  create_colorindex(cinfo);
-
-  /* Allocate Floyd-Steinberg workspace now if requested.
-   * We do this now since it is FAR storage and may affect the memory
-   * manager's space calculations.  If the user changes to FS dither
-   * mode in a later pass, we will allocate the space then, and will
-   * possibly overrun the max_memory_to_use setting.
-   */
-  if (cinfo->dither_mode == JDITHER_FS)
-    alloc_fs_workspace(cinfo);
-}
-
-#endif /* QUANT_1PASS_SUPPORTED */
diff --git a/dep/libjpeg/src/jquant2.c b/dep/libjpeg/src/jquant2.c
deleted file mode 100644
index 662b9bcef..000000000
--- a/dep/libjpeg/src/jquant2.c
+++ /dev/null
@@ -1,1311 +0,0 @@
-/*
- * jquant2.c
- *
- * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modified 2011-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains 2-pass color quantization (color mapping) routines.
- * These routines provide selection of a custom color map for an image,
- * followed by mapping of the image to that color map, with optional
- * Floyd-Steinberg dithering.
- * It is also possible to use just the second pass to map to an arbitrary
- * externally-given color map.
- *
- * Note: ordered dithering is not supported, since there isn't any fast
- * way to compute intercolor distances; it's unclear that ordered dither's
- * fundamental assumptions even hold with an irregularly spaced color map.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-#ifdef QUANT_2PASS_SUPPORTED
-
-
-/*
- * This module implements the well-known Heckbert paradigm for color
- * quantization.  Most of the ideas used here can be traced back to
- * Heckbert's seminal paper
- *   Heckbert, Paul.  "Color Image Quantization for Frame Buffer Display",
- *   Proc. SIGGRAPH '82, Computer Graphics v.16 #3 (July 1982), pp 297-304.
- *
- * In the first pass over the image, we accumulate a histogram showing the
- * usage count of each possible color.  To keep the histogram to a reasonable
- * size, we reduce the precision of the input; typical practice is to retain
- * 5 or 6 bits per color, so that 8 or 4 different input values are counted
- * in the same histogram cell.
- *
- * Next, the color-selection step begins with a box representing the whole
- * color space, and repeatedly splits the "largest" remaining box until we
- * have as many boxes as desired colors.  Then the mean color in each
- * remaining box becomes one of the possible output colors.
- * 
- * The second pass over the image maps each input pixel to the closest output
- * color (optionally after applying a Floyd-Steinberg dithering correction).
- * This mapping is logically trivial, but making it go fast enough requires
- * considerable care.
- *
- * Heckbert-style quantizers vary a good deal in their policies for choosing
- * the "largest" box and deciding where to cut it.  The particular policies
- * used here have proved out well in experimental comparisons, but better ones
- * may yet be found.
- *
- * In earlier versions of the IJG code, this module quantized in YCbCr color
- * space, processing the raw upsampled data without a color conversion step.
- * This allowed the color conversion math to be done only once per colormap
- * entry, not once per pixel.  However, that optimization precluded other
- * useful optimizations (such as merging color conversion with upsampling)
- * and it also interfered with desired capabilities such as quantizing to an
- * externally-supplied colormap.  We have therefore abandoned that approach.
- * The present code works in the post-conversion color space, typically RGB.
- *
- * To improve the visual quality of the results, we actually work in scaled
- * RGB space, giving G distances more weight than R, and R in turn more than
- * B.  To do everything in integer math, we must use integer scale factors.
- * The 2/3/1 scale factors used here correspond loosely to the relative
- * weights of the colors in the NTSC grayscale equation.
- * If you want to use this code to quantize a non-RGB color space, you'll
- * probably need to change these scale factors.
- */
-
-#define R_SCALE 2		/* scale R distances by this much */
-#define G_SCALE 3		/* scale G distances by this much */
-#define B_SCALE 1		/* and B by this much */
-
-/* Relabel R/G/B as components 0/1/2, respecting the RGB ordering defined
- * in jmorecfg.h.  As the code stands, it will do the right thing for R,G,B
- * and B,G,R orders.  If you define some other weird order in jmorecfg.h,
- * you'll get compile errors until you extend this logic.  In that case
- * you'll probably want to tweak the histogram sizes too.
- */
-
-#if RGB_RED == 0
-#define C0_SCALE R_SCALE
-#endif
-#if RGB_BLUE == 0
-#define C0_SCALE B_SCALE
-#endif
-#if RGB_GREEN == 1
-#define C1_SCALE G_SCALE
-#endif
-#if RGB_RED == 2
-#define C2_SCALE R_SCALE
-#endif
-#if RGB_BLUE == 2
-#define C2_SCALE B_SCALE
-#endif
-
-
-/*
- * First we have the histogram data structure and routines for creating it.
- *
- * The number of bits of precision can be adjusted by changing these symbols.
- * We recommend keeping 6 bits for G and 5 each for R and B.
- * If you have plenty of memory and cycles, 6 bits all around gives marginally
- * better results; if you are short of memory, 5 bits all around will save
- * some space but degrade the results.
- * To maintain a fully accurate histogram, we'd need to allocate a "long"
- * (preferably unsigned long) for each cell.  In practice this is overkill;
- * we can get by with 16 bits per cell.  Few of the cell counts will overflow,
- * and clamping those that do overflow to the maximum value will give close-
- * enough results.  This reduces the recommended histogram size from 256Kb
- * to 128Kb, which is a useful savings on PC-class machines.
- * (In the second pass the histogram space is re-used for pixel mapping data;
- * in that capacity, each cell must be able to store zero to the number of
- * desired colors.  16 bits/cell is plenty for that too.)
- * Since the JPEG code is intended to run in small memory model on 80x86
- * machines, we can't just allocate the histogram in one chunk.  Instead
- * of a true 3-D array, we use a row of pointers to 2-D arrays.  Each
- * pointer corresponds to a C0 value (typically 2^5 = 32 pointers) and
- * each 2-D array has 2^6*2^5 = 2048 or 2^6*2^6 = 4096 entries.  Note that
- * on 80x86 machines, the pointer row is in near memory but the actual
- * arrays are in far memory (same arrangement as we use for image arrays).
- */
-
-#define MAXNUMCOLORS  (MAXJSAMPLE+1) /* maximum size of colormap */
-
-/* These will do the right thing for either R,G,B or B,G,R color order,
- * but you may not like the results for other color orders.
- */
-#define HIST_C0_BITS  5		/* bits of precision in R/B histogram */
-#define HIST_C1_BITS  6		/* bits of precision in G histogram */
-#define HIST_C2_BITS  5		/* bits of precision in B/R histogram */
-
-/* Number of elements along histogram axes. */
-#define HIST_C0_ELEMS  (1<<HIST_C0_BITS)
-#define HIST_C1_ELEMS  (1<<HIST_C1_BITS)
-#define HIST_C2_ELEMS  (1<<HIST_C2_BITS)
-
-/* These are the amounts to shift an input value to get a histogram index. */
-#define C0_SHIFT  (BITS_IN_JSAMPLE-HIST_C0_BITS)
-#define C1_SHIFT  (BITS_IN_JSAMPLE-HIST_C1_BITS)
-#define C2_SHIFT  (BITS_IN_JSAMPLE-HIST_C2_BITS)
-
-
-typedef UINT16 histcell;	/* histogram cell; prefer an unsigned type */
-
-typedef histcell FAR * histptr;	/* for pointers to histogram cells */
-
-typedef histcell hist1d[HIST_C2_ELEMS]; /* typedefs for the array */
-typedef hist1d FAR * hist2d;	/* type for the 2nd-level pointers */
-typedef hist2d * hist3d;	/* type for top-level pointer */
-
-
-/* Declarations for Floyd-Steinberg dithering.
- *
- * Errors are accumulated into the array fserrors[], at a resolution of
- * 1/16th of a pixel count.  The error at a given pixel is propagated
- * to its not-yet-processed neighbors using the standard F-S fractions,
- *		...	(here)	7/16
- *		3/16	5/16	1/16
- * We work left-to-right on even rows, right-to-left on odd rows.
- *
- * We can get away with a single array (holding one row's worth of errors)
- * by using it to store the current row's errors at pixel columns not yet
- * processed, but the next row's errors at columns already processed.  We
- * need only a few extra variables to hold the errors immediately around the
- * current column.  (If we are lucky, those variables are in registers, but
- * even if not, they're probably cheaper to access than array elements are.)
- *
- * The fserrors[] array has (#columns + 2) entries; the extra entry at
- * each end saves us from special-casing the first and last pixels.
- * Each entry is three values long, one value for each color component.
- *
- * Note: on a wide image, we might not have enough room in a PC's near data
- * segment to hold the error array; so it is allocated with alloc_large.
- */
-
-#if BITS_IN_JSAMPLE == 8
-typedef INT16 FSERROR;		/* 16 bits should be enough */
-typedef int LOCFSERROR;		/* use 'int' for calculation temps */
-#else
-typedef INT32 FSERROR;		/* may need more than 16 bits */
-typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
-#endif
-
-typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
-
-
-/* Private subobject */
-
-typedef struct {
-  struct jpeg_color_quantizer pub; /* public fields */
-
-  /* Space for the eventually created colormap is stashed here */
-  JSAMPARRAY sv_colormap;	/* colormap allocated at init time */
-  int desired;			/* desired # of colors = size of colormap */
-
-  /* Variables for accumulating image statistics */
-  hist3d histogram;		/* pointer to the histogram */
-
-  boolean needs_zeroed;		/* TRUE if next pass must zero histogram */
-
-  /* Variables for Floyd-Steinberg dithering */
-  FSERRPTR fserrors;		/* accumulated errors */
-  boolean on_odd_row;		/* flag to remember which row we are on */
-  int * error_limiter;		/* table for clamping the applied error */
-} my_cquantizer;
-
-typedef my_cquantizer * my_cquantize_ptr;
-
-
-/*
- * Prescan some rows of pixels.
- * In this module the prescan simply updates the histogram, which has been
- * initialized to zeroes by start_pass.
- * An output_buf parameter is required by the method signature, but no data
- * is actually output (in fact the buffer controller is probably passing a
- * NULL pointer).
- */
-
-METHODDEF(void)
-prescan_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		  JSAMPARRAY output_buf, int num_rows)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  register JSAMPROW ptr;
-  register histptr histp;
-  register hist3d histogram = cquantize->histogram;
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-
-  for (row = 0; row < num_rows; row++) {
-    ptr = input_buf[row];
-    for (col = width; col > 0; col--) {
-      /* get pixel value and index into the histogram */
-      histp = & histogram[GETJSAMPLE(ptr[0]) >> C0_SHIFT]
-			 [GETJSAMPLE(ptr[1]) >> C1_SHIFT]
-			 [GETJSAMPLE(ptr[2]) >> C2_SHIFT];
-      /* increment, check for overflow and undo increment if so. */
-      if (++(*histp) <= 0)
-	(*histp)--;
-      ptr += 3;
-    }
-  }
-}
-
-
-/*
- * Next we have the really interesting routines: selection of a colormap
- * given the completed histogram.
- * These routines work with a list of "boxes", each representing a rectangular
- * subset of the input color space (to histogram precision).
- */
-
-typedef struct {
-  /* The bounds of the box (inclusive); expressed as histogram indexes */
-  int c0min, c0max;
-  int c1min, c1max;
-  int c2min, c2max;
-  /* The volume (actually 2-norm) of the box */
-  INT32 volume;
-  /* The number of nonzero histogram cells within this box */
-  long colorcount;
-} box;
-
-typedef box * boxptr;
-
-
-LOCAL(boxptr)
-find_biggest_color_pop (boxptr boxlist, int numboxes)
-/* Find the splittable box with the largest color population */
-/* Returns NULL if no splittable boxes remain */
-{
-  register boxptr boxp;
-  register int i;
-  register long maxc = 0;
-  boxptr which = NULL;
-  
-  for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
-    if (boxp->colorcount > maxc && boxp->volume > 0) {
-      which = boxp;
-      maxc = boxp->colorcount;
-    }
-  }
-  return which;
-}
-
-
-LOCAL(boxptr)
-find_biggest_volume (boxptr boxlist, int numboxes)
-/* Find the splittable box with the largest (scaled) volume */
-/* Returns NULL if no splittable boxes remain */
-{
-  register boxptr boxp;
-  register int i;
-  register INT32 maxv = 0;
-  boxptr which = NULL;
-  
-  for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
-    if (boxp->volume > maxv) {
-      which = boxp;
-      maxv = boxp->volume;
-    }
-  }
-  return which;
-}
-
-
-LOCAL(void)
-update_box (j_decompress_ptr cinfo, boxptr boxp)
-/* Shrink the min/max bounds of a box to enclose only nonzero elements, */
-/* and recompute its volume and population */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  hist3d histogram = cquantize->histogram;
-  histptr histp;
-  int c0,c1,c2;
-  int c0min,c0max,c1min,c1max,c2min,c2max;
-  INT32 dist0,dist1,dist2;
-  long ccount;
-  
-  c0min = boxp->c0min;  c0max = boxp->c0max;
-  c1min = boxp->c1min;  c1max = boxp->c1max;
-  c2min = boxp->c2min;  c2max = boxp->c2max;
-  
-  if (c0max > c0min)
-    for (c0 = c0min; c0 <= c0max; c0++)
-      for (c1 = c1min; c1 <= c1max; c1++) {
-	histp = & histogram[c0][c1][c2min];
-	for (c2 = c2min; c2 <= c2max; c2++)
-	  if (*histp++ != 0) {
-	    boxp->c0min = c0min = c0;
-	    goto have_c0min;
-	  }
-      }
- have_c0min:
-  if (c0max > c0min)
-    for (c0 = c0max; c0 >= c0min; c0--)
-      for (c1 = c1min; c1 <= c1max; c1++) {
-	histp = & histogram[c0][c1][c2min];
-	for (c2 = c2min; c2 <= c2max; c2++)
-	  if (*histp++ != 0) {
-	    boxp->c0max = c0max = c0;
-	    goto have_c0max;
-	  }
-      }
- have_c0max:
-  if (c1max > c1min)
-    for (c1 = c1min; c1 <= c1max; c1++)
-      for (c0 = c0min; c0 <= c0max; c0++) {
-	histp = & histogram[c0][c1][c2min];
-	for (c2 = c2min; c2 <= c2max; c2++)
-	  if (*histp++ != 0) {
-	    boxp->c1min = c1min = c1;
-	    goto have_c1min;
-	  }
-      }
- have_c1min:
-  if (c1max > c1min)
-    for (c1 = c1max; c1 >= c1min; c1--)
-      for (c0 = c0min; c0 <= c0max; c0++) {
-	histp = & histogram[c0][c1][c2min];
-	for (c2 = c2min; c2 <= c2max; c2++)
-	  if (*histp++ != 0) {
-	    boxp->c1max = c1max = c1;
-	    goto have_c1max;
-	  }
-      }
- have_c1max:
-  if (c2max > c2min)
-    for (c2 = c2min; c2 <= c2max; c2++)
-      for (c0 = c0min; c0 <= c0max; c0++) {
-	histp = & histogram[c0][c1min][c2];
-	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
-	  if (*histp != 0) {
-	    boxp->c2min = c2min = c2;
-	    goto have_c2min;
-	  }
-      }
- have_c2min:
-  if (c2max > c2min)
-    for (c2 = c2max; c2 >= c2min; c2--)
-      for (c0 = c0min; c0 <= c0max; c0++) {
-	histp = & histogram[c0][c1min][c2];
-	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
-	  if (*histp != 0) {
-	    boxp->c2max = c2max = c2;
-	    goto have_c2max;
-	  }
-      }
- have_c2max:
-
-  /* Update box volume.
-   * We use 2-norm rather than real volume here; this biases the method
-   * against making long narrow boxes, and it has the side benefit that
-   * a box is splittable iff norm > 0.
-   * Since the differences are expressed in histogram-cell units,
-   * we have to shift back to JSAMPLE units to get consistent distances;
-   * after which, we scale according to the selected distance scale factors.
-   */
-  dist0 = ((c0max - c0min) << C0_SHIFT) * C0_SCALE;
-  dist1 = ((c1max - c1min) << C1_SHIFT) * C1_SCALE;
-  dist2 = ((c2max - c2min) << C2_SHIFT) * C2_SCALE;
-  boxp->volume = dist0*dist0 + dist1*dist1 + dist2*dist2;
-  
-  /* Now scan remaining volume of box and compute population */
-  ccount = 0;
-  for (c0 = c0min; c0 <= c0max; c0++)
-    for (c1 = c1min; c1 <= c1max; c1++) {
-      histp = & histogram[c0][c1][c2min];
-      for (c2 = c2min; c2 <= c2max; c2++, histp++)
-	if (*histp != 0) {
-	  ccount++;
-	}
-    }
-  boxp->colorcount = ccount;
-}
-
-
-LOCAL(int)
-median_cut (j_decompress_ptr cinfo, boxptr boxlist, int numboxes,
-	    int desired_colors)
-/* Repeatedly select and split the largest box until we have enough boxes */
-{
-  int n,lb;
-  int c0,c1,c2,cmax;
-  register boxptr b1,b2;
-
-  while (numboxes < desired_colors) {
-    /* Select box to split.
-     * Current algorithm: by population for first half, then by volume.
-     */
-    if (numboxes*2 <= desired_colors) {
-      b1 = find_biggest_color_pop(boxlist, numboxes);
-    } else {
-      b1 = find_biggest_volume(boxlist, numboxes);
-    }
-    if (b1 == NULL)		/* no splittable boxes left! */
-      break;
-    b2 = &boxlist[numboxes];	/* where new box will go */
-    /* Copy the color bounds to the new box. */
-    b2->c0max = b1->c0max; b2->c1max = b1->c1max; b2->c2max = b1->c2max;
-    b2->c0min = b1->c0min; b2->c1min = b1->c1min; b2->c2min = b1->c2min;
-    /* Choose which axis to split the box on.
-     * Current algorithm: longest scaled axis.
-     * See notes in update_box about scaling distances.
-     */
-    c0 = ((b1->c0max - b1->c0min) << C0_SHIFT) * C0_SCALE;
-    c1 = ((b1->c1max - b1->c1min) << C1_SHIFT) * C1_SCALE;
-    c2 = ((b1->c2max - b1->c2min) << C2_SHIFT) * C2_SCALE;
-    /* We want to break any ties in favor of green, then red, blue last.
-     * This code does the right thing for R,G,B or B,G,R color orders only.
-     */
-#if RGB_RED == 0
-    cmax = c1; n = 1;
-    if (c0 > cmax) { cmax = c0; n = 0; }
-    if (c2 > cmax) { n = 2; }
-#else
-    cmax = c1; n = 1;
-    if (c2 > cmax) { cmax = c2; n = 2; }
-    if (c0 > cmax) { n = 0; }
-#endif
-    /* Choose split point along selected axis, and update box bounds.
-     * Current algorithm: split at halfway point.
-     * (Since the box has been shrunk to minimum volume,
-     * any split will produce two nonempty subboxes.)
-     * Note that lb value is max for lower box, so must be < old max.
-     */
-    switch (n) {
-    case 0:
-      lb = (b1->c0max + b1->c0min) / 2;
-      b1->c0max = lb;
-      b2->c0min = lb+1;
-      break;
-    case 1:
-      lb = (b1->c1max + b1->c1min) / 2;
-      b1->c1max = lb;
-      b2->c1min = lb+1;
-      break;
-    case 2:
-      lb = (b1->c2max + b1->c2min) / 2;
-      b1->c2max = lb;
-      b2->c2min = lb+1;
-      break;
-    }
-    /* Update stats for boxes */
-    update_box(cinfo, b1);
-    update_box(cinfo, b2);
-    numboxes++;
-  }
-  return numboxes;
-}
-
-
-LOCAL(void)
-compute_color (j_decompress_ptr cinfo, boxptr boxp, int icolor)
-/* Compute representative color for a box, put it in colormap[icolor] */
-{
-  /* Current algorithm: mean weighted by pixels (not colors) */
-  /* Note it is important to get the rounding correct! */
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  hist3d histogram = cquantize->histogram;
-  histptr histp;
-  int c0,c1,c2;
-  int c0min,c0max,c1min,c1max,c2min,c2max;
-  long count;
-  long total = 0;
-  long c0total = 0;
-  long c1total = 0;
-  long c2total = 0;
-  
-  c0min = boxp->c0min;  c0max = boxp->c0max;
-  c1min = boxp->c1min;  c1max = boxp->c1max;
-  c2min = boxp->c2min;  c2max = boxp->c2max;
-  
-  for (c0 = c0min; c0 <= c0max; c0++)
-    for (c1 = c1min; c1 <= c1max; c1++) {
-      histp = & histogram[c0][c1][c2min];
-      for (c2 = c2min; c2 <= c2max; c2++) {
-	if ((count = *histp++) != 0) {
-	  total += count;
-	  c0total += ((c0 << C0_SHIFT) + ((1<<C0_SHIFT)>>1)) * count;
-	  c1total += ((c1 << C1_SHIFT) + ((1<<C1_SHIFT)>>1)) * count;
-	  c2total += ((c2 << C2_SHIFT) + ((1<<C2_SHIFT)>>1)) * count;
-	}
-      }
-    }
-  
-  cinfo->colormap[0][icolor] = (JSAMPLE) ((c0total + (total>>1)) / total);
-  cinfo->colormap[1][icolor] = (JSAMPLE) ((c1total + (total>>1)) / total);
-  cinfo->colormap[2][icolor] = (JSAMPLE) ((c2total + (total>>1)) / total);
-}
-
-
-LOCAL(void)
-select_colors (j_decompress_ptr cinfo, int desired_colors)
-/* Master routine for color selection */
-{
-  boxptr boxlist;
-  int numboxes;
-  int i;
-
-  /* Allocate workspace for box list */
-  boxlist = (boxptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, desired_colors * SIZEOF(box));
-  /* Initialize one box containing whole space */
-  numboxes = 1;
-  boxlist[0].c0min = 0;
-  boxlist[0].c0max = MAXJSAMPLE >> C0_SHIFT;
-  boxlist[0].c1min = 0;
-  boxlist[0].c1max = MAXJSAMPLE >> C1_SHIFT;
-  boxlist[0].c2min = 0;
-  boxlist[0].c2max = MAXJSAMPLE >> C2_SHIFT;
-  /* Shrink it to actually-used volume and set its statistics */
-  update_box(cinfo, & boxlist[0]);
-  /* Perform median-cut to produce final box list */
-  numboxes = median_cut(cinfo, boxlist, numboxes, desired_colors);
-  /* Compute the representative color for each box, fill colormap */
-  for (i = 0; i < numboxes; i++)
-    compute_color(cinfo, & boxlist[i], i);
-  cinfo->actual_number_of_colors = numboxes;
-  TRACEMS1(cinfo, 1, JTRC_QUANT_SELECTED, numboxes);
-}
-
-
-/*
- * These routines are concerned with the time-critical task of mapping input
- * colors to the nearest color in the selected colormap.
- *
- * We re-use the histogram space as an "inverse color map", essentially a
- * cache for the results of nearest-color searches.  All colors within a
- * histogram cell will be mapped to the same colormap entry, namely the one
- * closest to the cell's center.  This may not be quite the closest entry to
- * the actual input color, but it's almost as good.  A zero in the cache
- * indicates we haven't found the nearest color for that cell yet; the array
- * is cleared to zeroes before starting the mapping pass.  When we find the
- * nearest color for a cell, its colormap index plus one is recorded in the
- * cache for future use.  The pass2 scanning routines call fill_inverse_cmap
- * when they need to use an unfilled entry in the cache.
- *
- * Our method of efficiently finding nearest colors is based on the "locally
- * sorted search" idea described by Heckbert and on the incremental distance
- * calculation described by Spencer W. Thomas in chapter III.1 of Graphics
- * Gems II (James Arvo, ed.  Academic Press, 1991).  Thomas points out that
- * the distances from a given colormap entry to each cell of the histogram can
- * be computed quickly using an incremental method: the differences between
- * distances to adjacent cells themselves differ by a constant.  This allows a
- * fairly fast implementation of the "brute force" approach of computing the
- * distance from every colormap entry to every histogram cell.  Unfortunately,
- * it needs a work array to hold the best-distance-so-far for each histogram
- * cell (because the inner loop has to be over cells, not colormap entries).
- * The work array elements have to be INT32s, so the work array would need
- * 256Kb at our recommended precision.  This is not feasible in DOS machines.
- *
- * To get around these problems, we apply Thomas' method to compute the
- * nearest colors for only the cells within a small subbox of the histogram.
- * The work array need be only as big as the subbox, so the memory usage
- * problem is solved.  Furthermore, we need not fill subboxes that are never
- * referenced in pass2; many images use only part of the color gamut, so a
- * fair amount of work is saved.  An additional advantage of this
- * approach is that we can apply Heckbert's locality criterion to quickly
- * eliminate colormap entries that are far away from the subbox; typically
- * three-fourths of the colormap entries are rejected by Heckbert's criterion,
- * and we need not compute their distances to individual cells in the subbox.
- * The speed of this approach is heavily influenced by the subbox size: too
- * small means too much overhead, too big loses because Heckbert's criterion
- * can't eliminate as many colormap entries.  Empirically the best subbox
- * size seems to be about 1/512th of the histogram (1/8th in each direction).
- *
- * Thomas' article also describes a refined method which is asymptotically
- * faster than the brute-force method, but it is also far more complex and
- * cannot efficiently be applied to small subboxes.  It is therefore not
- * useful for programs intended to be portable to DOS machines.  On machines
- * with plenty of memory, filling the whole histogram in one shot with Thomas'
- * refined method might be faster than the present code --- but then again,
- * it might not be any faster, and it's certainly more complicated.
- */
-
-
-/* log2(histogram cells in update box) for each axis; this can be adjusted */
-#define BOX_C0_LOG  (HIST_C0_BITS-3)
-#define BOX_C1_LOG  (HIST_C1_BITS-3)
-#define BOX_C2_LOG  (HIST_C2_BITS-3)
-
-#define BOX_C0_ELEMS  (1<<BOX_C0_LOG) /* # of hist cells in update box */
-#define BOX_C1_ELEMS  (1<<BOX_C1_LOG)
-#define BOX_C2_ELEMS  (1<<BOX_C2_LOG)
-
-#define BOX_C0_SHIFT  (C0_SHIFT + BOX_C0_LOG)
-#define BOX_C1_SHIFT  (C1_SHIFT + BOX_C1_LOG)
-#define BOX_C2_SHIFT  (C2_SHIFT + BOX_C2_LOG)
-
-
-/*
- * The next three routines implement inverse colormap filling.  They could
- * all be folded into one big routine, but splitting them up this way saves
- * some stack space (the mindist[] and bestdist[] arrays need not coexist)
- * and may allow some compilers to produce better code by registerizing more
- * inner-loop variables.
- */
-
-LOCAL(int)
-find_nearby_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
-		    JSAMPLE colorlist[])
-/* Locate the colormap entries close enough to an update box to be candidates
- * for the nearest entry to some cell(s) in the update box.  The update box
- * is specified by the center coordinates of its first cell.  The number of
- * candidate colormap entries is returned, and their colormap indexes are
- * placed in colorlist[].
- * This routine uses Heckbert's "locally sorted search" criterion to select
- * the colors that need further consideration.
- */
-{
-  int numcolors = cinfo->actual_number_of_colors;
-  int maxc0, maxc1, maxc2;
-  int centerc0, centerc1, centerc2;
-  int i, x, ncolors;
-  INT32 minmaxdist, min_dist, max_dist, tdist;
-  INT32 mindist[MAXNUMCOLORS];	/* min distance to colormap entry i */
-
-  /* Compute true coordinates of update box's upper corner and center.
-   * Actually we compute the coordinates of the center of the upper-corner
-   * histogram cell, which are the upper bounds of the volume we care about.
-   * Note that since ">>" rounds down, the "center" values may be closer to
-   * min than to max; hence comparisons to them must be "<=", not "<".
-   */
-  maxc0 = minc0 + ((1 << BOX_C0_SHIFT) - (1 << C0_SHIFT));
-  centerc0 = (minc0 + maxc0) >> 1;
-  maxc1 = minc1 + ((1 << BOX_C1_SHIFT) - (1 << C1_SHIFT));
-  centerc1 = (minc1 + maxc1) >> 1;
-  maxc2 = minc2 + ((1 << BOX_C2_SHIFT) - (1 << C2_SHIFT));
-  centerc2 = (minc2 + maxc2) >> 1;
-
-  /* For each color in colormap, find:
-   *  1. its minimum squared-distance to any point in the update box
-   *     (zero if color is within update box);
-   *  2. its maximum squared-distance to any point in the update box.
-   * Both of these can be found by considering only the corners of the box.
-   * We save the minimum distance for each color in mindist[];
-   * only the smallest maximum distance is of interest.
-   */
-  minmaxdist = 0x7FFFFFFFL;
-
-  for (i = 0; i < numcolors; i++) {
-    /* We compute the squared-c0-distance term, then add in the other two. */
-    x = GETJSAMPLE(cinfo->colormap[0][i]);
-    if (x < minc0) {
-      tdist = (x - minc0) * C0_SCALE;
-      min_dist = tdist*tdist;
-      tdist = (x - maxc0) * C0_SCALE;
-      max_dist = tdist*tdist;
-    } else if (x > maxc0) {
-      tdist = (x - maxc0) * C0_SCALE;
-      min_dist = tdist*tdist;
-      tdist = (x - minc0) * C0_SCALE;
-      max_dist = tdist*tdist;
-    } else {
-      /* within cell range so no contribution to min_dist */
-      min_dist = 0;
-      if (x <= centerc0) {
-	tdist = (x - maxc0) * C0_SCALE;
-	max_dist = tdist*tdist;
-      } else {
-	tdist = (x - minc0) * C0_SCALE;
-	max_dist = tdist*tdist;
-      }
-    }
-
-    x = GETJSAMPLE(cinfo->colormap[1][i]);
-    if (x < minc1) {
-      tdist = (x - minc1) * C1_SCALE;
-      min_dist += tdist*tdist;
-      tdist = (x - maxc1) * C1_SCALE;
-      max_dist += tdist*tdist;
-    } else if (x > maxc1) {
-      tdist = (x - maxc1) * C1_SCALE;
-      min_dist += tdist*tdist;
-      tdist = (x - minc1) * C1_SCALE;
-      max_dist += tdist*tdist;
-    } else {
-      /* within cell range so no contribution to min_dist */
-      if (x <= centerc1) {
-	tdist = (x - maxc1) * C1_SCALE;
-	max_dist += tdist*tdist;
-      } else {
-	tdist = (x - minc1) * C1_SCALE;
-	max_dist += tdist*tdist;
-      }
-    }
-
-    x = GETJSAMPLE(cinfo->colormap[2][i]);
-    if (x < minc2) {
-      tdist = (x - minc2) * C2_SCALE;
-      min_dist += tdist*tdist;
-      tdist = (x - maxc2) * C2_SCALE;
-      max_dist += tdist*tdist;
-    } else if (x > maxc2) {
-      tdist = (x - maxc2) * C2_SCALE;
-      min_dist += tdist*tdist;
-      tdist = (x - minc2) * C2_SCALE;
-      max_dist += tdist*tdist;
-    } else {
-      /* within cell range so no contribution to min_dist */
-      if (x <= centerc2) {
-	tdist = (x - maxc2) * C2_SCALE;
-	max_dist += tdist*tdist;
-      } else {
-	tdist = (x - minc2) * C2_SCALE;
-	max_dist += tdist*tdist;
-      }
-    }
-
-    mindist[i] = min_dist;	/* save away the results */
-    if (max_dist < minmaxdist)
-      minmaxdist = max_dist;
-  }
-
-  /* Now we know that no cell in the update box is more than minmaxdist
-   * away from some colormap entry.  Therefore, only colors that are
-   * within minmaxdist of some part of the box need be considered.
-   */
-  ncolors = 0;
-  for (i = 0; i < numcolors; i++) {
-    if (mindist[i] <= minmaxdist)
-      colorlist[ncolors++] = (JSAMPLE) i;
-  }
-  return ncolors;
-}
-
-
-LOCAL(void)
-find_best_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
-		  int numcolors, JSAMPLE colorlist[], JSAMPLE bestcolor[])
-/* Find the closest colormap entry for each cell in the update box,
- * given the list of candidate colors prepared by find_nearby_colors.
- * Return the indexes of the closest entries in the bestcolor[] array.
- * This routine uses Thomas' incremental distance calculation method to
- * find the distance from a colormap entry to successive cells in the box.
- */
-{
-  int ic0, ic1, ic2;
-  int i, icolor;
-  register INT32 * bptr;	/* pointer into bestdist[] array */
-  JSAMPLE * cptr;		/* pointer into bestcolor[] array */
-  INT32 dist0, dist1;		/* initial distance values */
-  register INT32 dist2;		/* current distance in inner loop */
-  INT32 xx0, xx1;		/* distance increments */
-  register INT32 xx2;
-  INT32 inc0, inc1, inc2;	/* initial values for increments */
-  /* This array holds the distance to the nearest-so-far color for each cell */
-  INT32 bestdist[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
-
-  /* Initialize best-distance for each cell of the update box */
-  bptr = bestdist;
-  for (i = BOX_C0_ELEMS*BOX_C1_ELEMS*BOX_C2_ELEMS-1; i >= 0; i--)
-    *bptr++ = 0x7FFFFFFFL;
-  
-  /* For each color selected by find_nearby_colors,
-   * compute its distance to the center of each cell in the box.
-   * If that's less than best-so-far, update best distance and color number.
-   */
-  
-  /* Nominal steps between cell centers ("x" in Thomas article) */
-#define STEP_C0  ((1 << C0_SHIFT) * C0_SCALE)
-#define STEP_C1  ((1 << C1_SHIFT) * C1_SCALE)
-#define STEP_C2  ((1 << C2_SHIFT) * C2_SCALE)
-  
-  for (i = 0; i < numcolors; i++) {
-    icolor = GETJSAMPLE(colorlist[i]);
-    /* Compute (square of) distance from minc0/c1/c2 to this color */
-    inc0 = (minc0 - GETJSAMPLE(cinfo->colormap[0][icolor])) * C0_SCALE;
-    dist0 = inc0*inc0;
-    inc1 = (minc1 - GETJSAMPLE(cinfo->colormap[1][icolor])) * C1_SCALE;
-    dist0 += inc1*inc1;
-    inc2 = (minc2 - GETJSAMPLE(cinfo->colormap[2][icolor])) * C2_SCALE;
-    dist0 += inc2*inc2;
-    /* Form the initial difference increments */
-    inc0 = inc0 * (2 * STEP_C0) + STEP_C0 * STEP_C0;
-    inc1 = inc1 * (2 * STEP_C1) + STEP_C1 * STEP_C1;
-    inc2 = inc2 * (2 * STEP_C2) + STEP_C2 * STEP_C2;
-    /* Now loop over all cells in box, updating distance per Thomas method */
-    bptr = bestdist;
-    cptr = bestcolor;
-    xx0 = inc0;
-    for (ic0 = BOX_C0_ELEMS-1; ic0 >= 0; ic0--) {
-      dist1 = dist0;
-      xx1 = inc1;
-      for (ic1 = BOX_C1_ELEMS-1; ic1 >= 0; ic1--) {
-	dist2 = dist1;
-	xx2 = inc2;
-	for (ic2 = BOX_C2_ELEMS-1; ic2 >= 0; ic2--) {
-	  if (dist2 < *bptr) {
-	    *bptr = dist2;
-	    *cptr = (JSAMPLE) icolor;
-	  }
-	  dist2 += xx2;
-	  xx2 += 2 * STEP_C2 * STEP_C2;
-	  bptr++;
-	  cptr++;
-	}
-	dist1 += xx1;
-	xx1 += 2 * STEP_C1 * STEP_C1;
-      }
-      dist0 += xx0;
-      xx0 += 2 * STEP_C0 * STEP_C0;
-    }
-  }
-}
-
-
-LOCAL(void)
-fill_inverse_cmap (j_decompress_ptr cinfo, int c0, int c1, int c2)
-/* Fill the inverse-colormap entries in the update box that contains */
-/* histogram cell c0/c1/c2.  (Only that one cell MUST be filled, but */
-/* we can fill as many others as we wish.) */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  hist3d histogram = cquantize->histogram;
-  int minc0, minc1, minc2;	/* lower left corner of update box */
-  int ic0, ic1, ic2;
-  register JSAMPLE * cptr;	/* pointer into bestcolor[] array */
-  register histptr cachep;	/* pointer into main cache array */
-  /* This array lists the candidate colormap indexes. */
-  JSAMPLE colorlist[MAXNUMCOLORS];
-  int numcolors;		/* number of candidate colors */
-  /* This array holds the actually closest colormap index for each cell. */
-  JSAMPLE bestcolor[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
-
-  /* Convert cell coordinates to update box ID */
-  c0 >>= BOX_C0_LOG;
-  c1 >>= BOX_C1_LOG;
-  c2 >>= BOX_C2_LOG;
-
-  /* Compute true coordinates of update box's origin corner.
-   * Actually we compute the coordinates of the center of the corner
-   * histogram cell, which are the lower bounds of the volume we care about.
-   */
-  minc0 = (c0 << BOX_C0_SHIFT) + ((1 << C0_SHIFT) >> 1);
-  minc1 = (c1 << BOX_C1_SHIFT) + ((1 << C1_SHIFT) >> 1);
-  minc2 = (c2 << BOX_C2_SHIFT) + ((1 << C2_SHIFT) >> 1);
-  
-  /* Determine which colormap entries are close enough to be candidates
-   * for the nearest entry to some cell in the update box.
-   */
-  numcolors = find_nearby_colors(cinfo, minc0, minc1, minc2, colorlist);
-
-  /* Determine the actually nearest colors. */
-  find_best_colors(cinfo, minc0, minc1, minc2, numcolors, colorlist,
-		   bestcolor);
-
-  /* Save the best color numbers (plus 1) in the main cache array */
-  c0 <<= BOX_C0_LOG;		/* convert ID back to base cell indexes */
-  c1 <<= BOX_C1_LOG;
-  c2 <<= BOX_C2_LOG;
-  cptr = bestcolor;
-  for (ic0 = 0; ic0 < BOX_C0_ELEMS; ic0++) {
-    for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) {
-      cachep = & histogram[c0+ic0][c1+ic1][c2];
-      for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) {
-	*cachep++ = (histcell) (GETJSAMPLE(*cptr++) + 1);
-      }
-    }
-  }
-}
-
-
-/*
- * Map some rows of pixels to the output colormapped representation.
- */
-
-METHODDEF(void)
-pass2_no_dither (j_decompress_ptr cinfo,
-		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
-/* This version performs no dithering */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  hist3d histogram = cquantize->histogram;
-  register JSAMPROW inptr, outptr;
-  register histptr cachep;
-  register int c0, c1, c2;
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-
-  for (row = 0; row < num_rows; row++) {
-    inptr = input_buf[row];
-    outptr = output_buf[row];
-    for (col = width; col > 0; col--) {
-      /* get pixel value and index into the cache */
-      c0 = GETJSAMPLE(*inptr++) >> C0_SHIFT;
-      c1 = GETJSAMPLE(*inptr++) >> C1_SHIFT;
-      c2 = GETJSAMPLE(*inptr++) >> C2_SHIFT;
-      cachep = & histogram[c0][c1][c2];
-      /* If we have not seen this color before, find nearest colormap entry */
-      /* and update the cache */
-      if (*cachep == 0)
-	fill_inverse_cmap(cinfo, c0,c1,c2);
-      /* Now emit the colormap index for this cell */
-      *outptr++ = (JSAMPLE) (*cachep - 1);
-    }
-  }
-}
-
-
-METHODDEF(void)
-pass2_fs_dither (j_decompress_ptr cinfo,
-		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
-/* This version performs Floyd-Steinberg dithering */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  hist3d histogram = cquantize->histogram;
-  register LOCFSERROR cur0, cur1, cur2;	/* current error or pixel value */
-  LOCFSERROR belowerr0, belowerr1, belowerr2; /* error for pixel below cur */
-  LOCFSERROR bpreverr0, bpreverr1, bpreverr2; /* error for below/prev col */
-  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
-  JSAMPROW inptr;		/* => current input pixel */
-  JSAMPROW outptr;		/* => current output pixel */
-  histptr cachep;
-  int dir;			/* +1 or -1 depending on direction */
-  int dir3;			/* 3*dir, for advancing inptr & errorptr */
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-  JSAMPLE *range_limit = cinfo->sample_range_limit;
-  int *error_limit = cquantize->error_limiter;
-  JSAMPROW colormap0 = cinfo->colormap[0];
-  JSAMPROW colormap1 = cinfo->colormap[1];
-  JSAMPROW colormap2 = cinfo->colormap[2];
-  SHIFT_TEMPS
-
-  for (row = 0; row < num_rows; row++) {
-    inptr = input_buf[row];
-    outptr = output_buf[row];
-    if (cquantize->on_odd_row) {
-      /* work right to left in this row */
-      inptr += (width-1) * 3;	/* so point to rightmost pixel */
-      outptr += width-1;
-      dir = -1;
-      dir3 = -3;
-      errorptr = cquantize->fserrors + (width+1)*3; /* => entry after last column */
-      cquantize->on_odd_row = FALSE; /* flip for next time */
-    } else {
-      /* work left to right in this row */
-      dir = 1;
-      dir3 = 3;
-      errorptr = cquantize->fserrors; /* => entry before first real column */
-      cquantize->on_odd_row = TRUE; /* flip for next time */
-    }
-    /* Preset error values: no error propagated to first pixel from left */
-    cur0 = cur1 = cur2 = 0;
-    /* and no error propagated to row below yet */
-    belowerr0 = belowerr1 = belowerr2 = 0;
-    bpreverr0 = bpreverr1 = bpreverr2 = 0;
-
-    for (col = width; col > 0; col--) {
-      /* curN holds the error propagated from the previous pixel on the
-       * current line.  Add the error propagated from the previous line
-       * to form the complete error correction term for this pixel, and
-       * round the error term (which is expressed * 16) to an integer.
-       * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
-       * for either sign of the error value.
-       * Note: errorptr points to *previous* column's array entry.
-       */
-      cur0 = RIGHT_SHIFT(cur0 + errorptr[dir3+0] + 8, 4);
-      cur1 = RIGHT_SHIFT(cur1 + errorptr[dir3+1] + 8, 4);
-      cur2 = RIGHT_SHIFT(cur2 + errorptr[dir3+2] + 8, 4);
-      /* Limit the error using transfer function set by init_error_limit.
-       * See comments with init_error_limit for rationale.
-       */
-      cur0 = error_limit[cur0];
-      cur1 = error_limit[cur1];
-      cur2 = error_limit[cur2];
-      /* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
-       * The maximum error is +- MAXJSAMPLE (or less with error limiting);
-       * this sets the required size of the range_limit array.
-       */
-      cur0 += GETJSAMPLE(inptr[0]);
-      cur1 += GETJSAMPLE(inptr[1]);
-      cur2 += GETJSAMPLE(inptr[2]);
-      cur0 = GETJSAMPLE(range_limit[cur0]);
-      cur1 = GETJSAMPLE(range_limit[cur1]);
-      cur2 = GETJSAMPLE(range_limit[cur2]);
-      /* Index into the cache with adjusted pixel value */
-      cachep = & histogram[cur0>>C0_SHIFT][cur1>>C1_SHIFT][cur2>>C2_SHIFT];
-      /* If we have not seen this color before, find nearest colormap */
-      /* entry and update the cache */
-      if (*cachep == 0)
-	fill_inverse_cmap(cinfo, cur0>>C0_SHIFT,cur1>>C1_SHIFT,cur2>>C2_SHIFT);
-      /* Now emit the colormap index for this cell */
-      { register int pixcode = *cachep - 1;
-	*outptr = (JSAMPLE) pixcode;
-	/* Compute representation error for this pixel */
-	cur0 -= GETJSAMPLE(colormap0[pixcode]);
-	cur1 -= GETJSAMPLE(colormap1[pixcode]);
-	cur2 -= GETJSAMPLE(colormap2[pixcode]);
-      }
-      /* Compute error fractions to be propagated to adjacent pixels.
-       * Add these into the running sums, and simultaneously shift the
-       * next-line error sums left by 1 column.
-       */
-      { register LOCFSERROR bnexterr, delta;
-
-	bnexterr = cur0;	/* Process component 0 */
-	delta = cur0 * 2;
-	cur0 += delta;		/* form error * 3 */
-	errorptr[0] = (FSERROR) (bpreverr0 + cur0);
-	cur0 += delta;		/* form error * 5 */
-	bpreverr0 = belowerr0 + cur0;
-	belowerr0 = bnexterr;
-	cur0 += delta;		/* form error * 7 */
-	bnexterr = cur1;	/* Process component 1 */
-	delta = cur1 * 2;
-	cur1 += delta;		/* form error * 3 */
-	errorptr[1] = (FSERROR) (bpreverr1 + cur1);
-	cur1 += delta;		/* form error * 5 */
-	bpreverr1 = belowerr1 + cur1;
-	belowerr1 = bnexterr;
-	cur1 += delta;		/* form error * 7 */
-	bnexterr = cur2;	/* Process component 2 */
-	delta = cur2 * 2;
-	cur2 += delta;		/* form error * 3 */
-	errorptr[2] = (FSERROR) (bpreverr2 + cur2);
-	cur2 += delta;		/* form error * 5 */
-	bpreverr2 = belowerr2 + cur2;
-	belowerr2 = bnexterr;
-	cur2 += delta;		/* form error * 7 */
-      }
-      /* At this point curN contains the 7/16 error value to be propagated
-       * to the next pixel on the current line, and all the errors for the
-       * next line have been shifted over.  We are therefore ready to move on.
-       */
-      inptr += dir3;		/* Advance pixel pointers to next column */
-      outptr += dir;
-      errorptr += dir3;		/* advance errorptr to current column */
-    }
-    /* Post-loop cleanup: we must unload the final error values into the
-     * final fserrors[] entry.  Note we need not unload belowerrN because
-     * it is for the dummy column before or after the actual array.
-     */
-    errorptr[0] = (FSERROR) bpreverr0; /* unload prev errs into array */
-    errorptr[1] = (FSERROR) bpreverr1;
-    errorptr[2] = (FSERROR) bpreverr2;
-  }
-}
-
-
-/*
- * Initialize the error-limiting transfer function (lookup table).
- * The raw F-S error computation can potentially compute error values of up to
- * +- MAXJSAMPLE.  But we want the maximum correction applied to a pixel to be
- * much less, otherwise obviously wrong pixels will be created.  (Typical
- * effects include weird fringes at color-area boundaries, isolated bright
- * pixels in a dark area, etc.)  The standard advice for avoiding this problem
- * is to ensure that the "corners" of the color cube are allocated as output
- * colors; then repeated errors in the same direction cannot cause cascading
- * error buildup.  However, that only prevents the error from getting
- * completely out of hand; Aaron Giles reports that error limiting improves
- * the results even with corner colors allocated.
- * A simple clamping of the error values to about +- MAXJSAMPLE/8 works pretty
- * well, but the smoother transfer function used below is even better.  Thanks
- * to Aaron Giles for this idea.
- */
-
-LOCAL(void)
-init_error_limit (j_decompress_ptr cinfo)
-/* Allocate and fill in the error_limiter table */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  int * table;
-  int in, out;
-
-  table = (int *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE*2+1) * SIZEOF(int));
-  table += MAXJSAMPLE;		/* so can index -MAXJSAMPLE .. +MAXJSAMPLE */
-  cquantize->error_limiter = table;
-
-#define STEPSIZE ((MAXJSAMPLE+1)/16)
-  /* Map errors 1:1 up to +- MAXJSAMPLE/16 */
-  out = 0;
-  for (in = 0; in < STEPSIZE; in++, out++) {
-    table[in] = out; table[-in] = -out;
-  }
-  /* Map errors 1:2 up to +- 3*MAXJSAMPLE/16 */
-  for (; in < STEPSIZE*3; in++, out += (in&1) ? 0 : 1) {
-    table[in] = out; table[-in] = -out;
-  }
-  /* Clamp the rest to final out value (which is (MAXJSAMPLE+1)/8) */
-  for (; in <= MAXJSAMPLE; in++) {
-    table[in] = out; table[-in] = -out;
-  }
-#undef STEPSIZE
-}
-
-
-/*
- * Finish up at the end of each pass.
- */
-
-METHODDEF(void)
-finish_pass1 (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-
-  /* Select the representative colors and fill in cinfo->colormap */
-  cinfo->colormap = cquantize->sv_colormap;
-  select_colors(cinfo, cquantize->desired);
-  /* Force next pass to zero the color index table */
-  cquantize->needs_zeroed = TRUE;
-}
-
-
-METHODDEF(void)
-finish_pass2 (j_decompress_ptr cinfo)
-{
-  /* no work */
-}
-
-
-/*
- * Initialize for each processing pass.
- */
-
-METHODDEF(void)
-start_pass_2_quant (j_decompress_ptr cinfo, boolean is_pre_scan)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  hist3d histogram = cquantize->histogram;
-  int i;
-
-  /* Only F-S dithering or no dithering is supported. */
-  /* If user asks for ordered dither, give him F-S. */
-  if (cinfo->dither_mode != JDITHER_NONE)
-    cinfo->dither_mode = JDITHER_FS;
-
-  if (is_pre_scan) {
-    /* Set up method pointers */
-    cquantize->pub.color_quantize = prescan_quantize;
-    cquantize->pub.finish_pass = finish_pass1;
-    cquantize->needs_zeroed = TRUE; /* Always zero histogram */
-  } else {
-    /* Set up method pointers */
-    if (cinfo->dither_mode == JDITHER_FS)
-      cquantize->pub.color_quantize = pass2_fs_dither;
-    else
-      cquantize->pub.color_quantize = pass2_no_dither;
-    cquantize->pub.finish_pass = finish_pass2;
-
-    /* Make sure color count is acceptable */
-    i = cinfo->actual_number_of_colors;
-    if (i < 1)
-      ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 1);
-    if (i > MAXNUMCOLORS)
-      ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
-
-    if (cinfo->dither_mode == JDITHER_FS) {
-      size_t arraysize = ((size_t) cinfo->output_width + (size_t) 2)
-	* (3 * SIZEOF(FSERROR));
-      /* Allocate Floyd-Steinberg workspace if we didn't already. */
-      if (cquantize->fserrors == NULL)
-	cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
-	  ((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
-      /* Initialize the propagated errors to zero. */
-      FMEMZERO((void FAR *) cquantize->fserrors, arraysize);
-      /* Make the error-limit table if we didn't already. */
-      if (cquantize->error_limiter == NULL)
-	init_error_limit(cinfo);
-      cquantize->on_odd_row = FALSE;
-    }
-
-  }
-  /* Zero the histogram or inverse color map, if necessary */
-  if (cquantize->needs_zeroed) {
-    for (i = 0; i < HIST_C0_ELEMS; i++) {
-      FMEMZERO((void FAR *) histogram[i],
-	       HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
-    }
-    cquantize->needs_zeroed = FALSE;
-  }
-}
-
-
-/*
- * Switch to a new external colormap between output passes.
- */
-
-METHODDEF(void)
-new_color_map_2_quant (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-
-  /* Reset the inverse color map */
-  cquantize->needs_zeroed = TRUE;
-}
-
-
-/*
- * Module initialization routine for 2-pass color quantization.
- */
-
-GLOBAL(void)
-jinit_2pass_quantizer (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize;
-  int i;
-
-  cquantize = (my_cquantize_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_cquantizer));
-  cinfo->cquantize = &cquantize->pub;
-  cquantize->pub.start_pass = start_pass_2_quant;
-  cquantize->pub.new_color_map = new_color_map_2_quant;
-  cquantize->fserrors = NULL;	/* flag optional arrays not allocated */
-  cquantize->error_limiter = NULL;
-
-  /* Make sure jdmaster didn't give me a case I can't handle */
-  if (cinfo->out_color_components != 3)
-    ERREXIT(cinfo, JERR_NOTIMPL);
-
-  /* Allocate the histogram/inverse colormap storage */
-  cquantize->histogram = (hist3d) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, HIST_C0_ELEMS * SIZEOF(hist2d));
-  for (i = 0; i < HIST_C0_ELEMS; i++) {
-    cquantize->histogram[i] = (hist2d) (*cinfo->mem->alloc_large)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
-  }
-  cquantize->needs_zeroed = TRUE; /* histogram is garbage now */
-
-  /* Allocate storage for the completed colormap, if required.
-   * We do this now since it is FAR storage and may affect
-   * the memory manager's space calculations.
-   */
-  if (cinfo->enable_2pass_quant) {
-    /* Make sure color count is acceptable */
-    int desired = cinfo->desired_number_of_colors;
-    /* Lower bound on # of colors ... somewhat arbitrary as long as > 0 */
-    if (desired < 8)
-      ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 8);
-    /* Make sure colormap indexes can be represented by JSAMPLEs */
-    if (desired > MAXNUMCOLORS)
-      ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
-    cquantize->sv_colormap = (*cinfo->mem->alloc_sarray)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       (JDIMENSION) desired, (JDIMENSION) 3);
-    cquantize->desired = desired;
-  } else
-    cquantize->sv_colormap = NULL;
-
-  /* Only F-S dithering or no dithering is supported. */
-  /* If user asks for ordered dither, give him F-S. */
-  if (cinfo->dither_mode != JDITHER_NONE)
-    cinfo->dither_mode = JDITHER_FS;
-
-  /* Allocate Floyd-Steinberg workspace if necessary.
-   * This isn't really needed until pass 2, but again it is FAR storage.
-   * Although we will cope with a later change in dither_mode,
-   * we do not promise to honor max_memory_to_use if dither_mode changes.
-   */
-  if (cinfo->dither_mode == JDITHER_FS) {
-    cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       ((size_t) cinfo->output_width + (size_t) 2) * (3 * SIZEOF(FSERROR)));
-    /* Might as well create the error-limiting table too. */
-    init_error_limit(cinfo);
-  }
-}
-
-#endif /* QUANT_2PASS_SUPPORTED */
diff --git a/dep/libjpeg/src/jutils.c b/dep/libjpeg/src/jutils.c
deleted file mode 100644
index 31e16dfb5..000000000
--- a/dep/libjpeg/src/jutils.c
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * jutils.c
- *
- * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modified 2009-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains tables and miscellaneous utility routines needed
- * for both compression and decompression.
- * Note we prefix all global names with "j" to minimize conflicts with
- * a surrounding application.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * jpeg_zigzag_order[i] is the zigzag-order position of the i'th element
- * of a DCT block read in natural order (left to right, top to bottom).
- */
-
-#if 0				/* This table is not actually needed in v6a */
-
-const int jpeg_zigzag_order[DCTSIZE2] = {
-   0,  1,  5,  6, 14, 15, 27, 28,
-   2,  4,  7, 13, 16, 26, 29, 42,
-   3,  8, 12, 17, 25, 30, 41, 43,
-   9, 11, 18, 24, 31, 40, 44, 53,
-  10, 19, 23, 32, 39, 45, 52, 54,
-  20, 22, 33, 38, 46, 51, 55, 60,
-  21, 34, 37, 47, 50, 56, 59, 61,
-  35, 36, 48, 49, 57, 58, 62, 63
-};
-
-#endif
-
-/*
- * jpeg_natural_order[i] is the natural-order position of the i'th element
- * of zigzag order.
- *
- * When reading corrupted data, the Huffman decoders could attempt
- * to reference an entry beyond the end of this array (if the decoded
- * zero run length reaches past the end of the block).  To prevent
- * wild stores without adding an inner-loop test, we put some extra
- * "63"s after the real entries.  This will cause the extra coefficient
- * to be stored in location 63 of the block, not somewhere random.
- * The worst case would be a run-length of 15, which means we need 16
- * fake entries.
- */
-
-const int jpeg_natural_order[DCTSIZE2+16] = {
-   0,  1,  8, 16,  9,  2,  3, 10,
-  17, 24, 32, 25, 18, 11,  4,  5,
-  12, 19, 26, 33, 40, 48, 41, 34,
-  27, 20, 13,  6,  7, 14, 21, 28,
-  35, 42, 49, 56, 57, 50, 43, 36,
-  29, 22, 15, 23, 30, 37, 44, 51,
-  58, 59, 52, 45, 38, 31, 39, 46,
-  53, 60, 61, 54, 47, 55, 62, 63,
-  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
-  63, 63, 63, 63, 63, 63, 63, 63
-};
-
-const int jpeg_natural_order7[7*7+16] = {
-   0,  1,  8, 16,  9,  2,  3, 10,
-  17, 24, 32, 25, 18, 11,  4,  5,
-  12, 19, 26, 33, 40, 48, 41, 34,
-  27, 20, 13,  6, 14, 21, 28, 35,
-  42, 49, 50, 43, 36, 29, 22, 30,
-  37, 44, 51, 52, 45, 38, 46, 53,
-  54,
-  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
-  63, 63, 63, 63, 63, 63, 63, 63
-};
-
-const int jpeg_natural_order6[6*6+16] = {
-   0,  1,  8, 16,  9,  2,  3, 10,
-  17, 24, 32, 25, 18, 11,  4,  5,
-  12, 19, 26, 33, 40, 41, 34, 27,
-  20, 13, 21, 28, 35, 42, 43, 36,
-  29, 37, 44, 45,
-  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
-  63, 63, 63, 63, 63, 63, 63, 63
-};
-
-const int jpeg_natural_order5[5*5+16] = {
-   0,  1,  8, 16,  9,  2,  3, 10,
-  17, 24, 32, 25, 18, 11,  4, 12,
-  19, 26, 33, 34, 27, 20, 28, 35,
-  36,
-  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
-  63, 63, 63, 63, 63, 63, 63, 63
-};
-
-const int jpeg_natural_order4[4*4+16] = {
-   0,  1,  8, 16,  9,  2,  3, 10,
-  17, 24, 25, 18, 11, 19, 26, 27,
-  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
-  63, 63, 63, 63, 63, 63, 63, 63
-};
-
-const int jpeg_natural_order3[3*3+16] = {
-   0,  1,  8, 16,  9,  2, 10, 17,
-  18,
-  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
-  63, 63, 63, 63, 63, 63, 63, 63
-};
-
-const int jpeg_natural_order2[2*2+16] = {
-   0,  1,  8,  9,
-  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
-  63, 63, 63, 63, 63, 63, 63, 63
-};
-
-
-/*
- * Arithmetic utilities
- */
-
-GLOBAL(long)
-jdiv_round_up (long a, long b)
-/* Compute a/b rounded up to next integer, ie, ceil(a/b) */
-/* Assumes a >= 0, b > 0 */
-{
-  return (a + b - 1L) / b;
-}
-
-
-GLOBAL(long)
-jround_up (long a, long b)
-/* Compute a rounded up to next multiple of b, ie, ceil(a/b)*b */
-/* Assumes a >= 0, b > 0 */
-{
-  a += b - 1L;
-  return a - (a % b);
-}
-
-
-/* On normal machines we can apply MEMCOPY() and MEMZERO() to sample arrays
- * and coefficient-block arrays.  This won't work on 80x86 because the arrays
- * are FAR and we're assuming a small-pointer memory model.  However, some
- * DOS compilers provide far-pointer versions of memcpy() and memset() even
- * in the small-model libraries.  These will be used if USE_FMEM is defined.
- * Otherwise, the routines below do it the hard way.  (The performance cost
- * is not all that great, because these routines aren't very heavily used.)
- */
-
-#ifndef NEED_FAR_POINTERS	/* normal case, same as regular macro */
-#define FMEMCOPY(dest,src,size)	MEMCOPY(dest,src,size)
-#else				/* 80x86 case, define if we can */
-#ifdef USE_FMEM
-#define FMEMCOPY(dest,src,size)	_fmemcpy((void FAR *)(dest), (const void FAR *)(src), (size_t)(size))
-#else
-/* This function is for use by the FMEMZERO macro defined in jpegint.h.
- * Do not call this function directly, use the FMEMZERO macro instead.
- */
-GLOBAL(void)
-jzero_far (void FAR * target, size_t bytestozero)
-/* Zero out a chunk of FAR memory. */
-/* This might be sample-array data, block-array data, or alloc_large data. */
-{
-  register char FAR * ptr = (char FAR *) target;
-  register size_t count;
-
-  for (count = bytestozero; count > 0; count--) {
-    *ptr++ = 0;
-  }
-}
-#endif
-#endif
-
-
-GLOBAL(void)
-jcopy_sample_rows (JSAMPARRAY input_array,
-		   JSAMPARRAY output_array,
-		   int num_rows, JDIMENSION num_cols)
-/* Copy some rows of samples from one place to another.
- * num_rows rows are copied from *input_array++ to *output_array++;
- * these areas may overlap for duplication.
- * The source and destination arrays must be at least as wide as num_cols.
- */
-{
-  register JSAMPROW inptr, outptr;
-#ifdef FMEMCOPY
-  register size_t count = (size_t) num_cols * SIZEOF(JSAMPLE);
-#else
-  register JDIMENSION count;
-#endif
-  register int row;
-
-  for (row = num_rows; row > 0; row--) {
-    inptr = *input_array++;
-    outptr = *output_array++;
-#ifdef FMEMCOPY
-    FMEMCOPY(outptr, inptr, count);
-#else
-    for (count = num_cols; count > 0; count--)
-      *outptr++ = *inptr++;	/* needn't bother with GETJSAMPLE() here */
-#endif
-  }
-}
-
-
-GLOBAL(void)
-jcopy_block_row (JBLOCKROW input_row, JBLOCKROW output_row,
-		 JDIMENSION num_blocks)
-/* Copy a row of coefficient blocks from one place to another. */
-{
-#ifdef FMEMCOPY
-  FMEMCOPY(output_row, input_row, (size_t) num_blocks * (DCTSIZE2 * SIZEOF(JCOEF)));
-#else
-  register JCOEFPTR inptr, outptr;
-  register long count;
-
-  inptr = (JCOEFPTR) input_row;
-  outptr = (JCOEFPTR) output_row;
-  for (count = (long) num_blocks * DCTSIZE2; count > 0; count--) {
-    *outptr++ = *inptr++;
-  }
-#endif
-}
diff --git a/dep/libjpeg/src/jversion.h b/dep/libjpeg/src/jversion.h
deleted file mode 100644
index df53ef5e5..000000000
--- a/dep/libjpeg/src/jversion.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * jversion.h
- *
- * Copyright (C) 1991-2024, Thomas G. Lane, Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains software version identification.
- */
-
-
-#define JVERSION	"9f  14-Jan-2024"
-
-#define JCOPYRIGHT	"Copyright (C) 2024, Thomas G. Lane, Guido Vollbeding"
diff --git a/dep/libjpeg/src/transupp.c b/dep/libjpeg/src/transupp.c
deleted file mode 100644
index 6518936a0..000000000
--- a/dep/libjpeg/src/transupp.c
+++ /dev/null
@@ -1,2433 +0,0 @@
-/*
- * transupp.c
- *
- * Copyright (C) 1997-2023, Thomas G. Lane, Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains image transformation routines and other utility code
- * used by the jpegtran sample application.  These are NOT part of the core
- * JPEG library.  But we keep these routines separate from jpegtran.c to
- * ease the task of maintaining jpegtran-like programs that have other user
- * interfaces.
- */
-
-/* Although this file really shouldn't have access to the library internals,
- * it's helpful to let it call jround_up() and jcopy_block_row().
- * Also, the (switchable) virtual memory adaptation code for
- * the drop feature has dependencies on library internals.
- */
-#define JPEG_INTERNALS
-
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "transupp.h"		/* My own external interface */
-#include <ctype.h>		/* to declare isdigit() */
-
-
-#if TRANSFORMS_SUPPORTED
-
-/*
- * Lossless image transformation routines.  These routines work on DCT
- * coefficient arrays and thus do not require any lossy decompression
- * or recompression of the image.
- * Thanks to Guido Vollbeding for the initial design and code of this feature,
- * and to Ben Jackson for introducing the cropping feature.
- *
- * Horizontal flipping is done in-place, using a single top-to-bottom
- * pass through the virtual source array.  It will thus be much the
- * fastest option for images larger than main memory.
- *
- * The other routines require a set of destination virtual arrays, so they
- * need twice as much memory as jpegtran normally does.  The destination
- * arrays are always written in normal scan order (top to bottom) because
- * the virtual array manager expects this.  The source arrays will be scanned
- * in the corresponding order, which means multiple passes through the source
- * arrays for most of the transforms.  That could result in much thrashing
- * if the image is larger than main memory.
- *
- * If cropping or trimming is involved, the destination arrays may be smaller
- * than the source arrays.  Note it is not possible to do horizontal flip
- * in-place when a nonzero Y crop offset is specified, since we'd have to move
- * data from one block row to another but the virtual array manager doesn't
- * guarantee we can touch more than one row at a time.  So in that case,
- * we have to use a separate destination array.
- *
- * Some notes about the operating environment of the individual transform
- * routines:
- * 1. Both the source and destination virtual arrays are allocated from the
- *    source JPEG object, and therefore should be manipulated by calling the
- *    source's memory manager.
- * 2. The destination's component count should be used.  It may be smaller
- *    than the source's when forcing to grayscale.
- * 3. Likewise the destination's sampling factors should be used.  When
- *    forcing to grayscale the destination's sampling factors will be all 1,
- *    and we may as well take that as the effective iMCU size.
- * 4. When "trim" is in effect, the destination's dimensions will be the
- *    trimmed values but the source's will be untrimmed.
- * 5. When "crop" is in effect, the destination's dimensions will be the
- *    cropped values but the source's will be uncropped.  Each transform
- *    routine is responsible for picking up source data starting at the
- *    correct X and Y offset for the crop region.  (The X and Y offsets
- *    passed to the transform routines are measured in iMCU blocks of the
- *    destination.)
- * 6. All the routines assume that the source and destination buffers are
- *    padded out to a full iMCU boundary.  This is true, although for the
- *    source buffer it is an undocumented property of jdcoefct.c.
- */
-
-
-/* Drop code may be used with or without virtual memory adaptation code.
- * This code has some dependencies on internal library behavior, so you
- * may choose to disable it.  For example, it doesn't make a difference
- * if you only use jmemnobs anyway.
- */
-#ifndef DROP_REQUEST_FROM_SRC
-#define DROP_REQUEST_FROM_SRC 1		/* 0 disables adaptation */
-#endif
-
-
-#if DROP_REQUEST_FROM_SRC
-/* Force jpeg_read_coefficients to request
- * the virtual coefficient arrays from
- * the source decompression object.
- */
-METHODDEF(jvirt_barray_ptr)
-drop_request_virt_barray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
-			  JDIMENSION blocksperrow, JDIMENSION numrows,
-			  JDIMENSION maxaccess)
-{
-  j_common_ptr srcinfo = (j_common_ptr) cinfo->client_data;
-
-  return (*srcinfo->mem->request_virt_barray)
-	  (srcinfo, pool_id, pre_zero,
-	   blocksperrow, numrows, maxaccess);
-}
-
-
-/* Force jpeg_read_coefficients to return
- * after requesting and before accessing
- * the virtual coefficient arrays.
- */
-METHODDEF(int)
-drop_consume_input (j_decompress_ptr cinfo)
-{
-  return JPEG_SUSPENDED;
-}
-
-
-METHODDEF(void)
-drop_start_input_pass (j_decompress_ptr cinfo)
-{
-  cinfo->inputctl->consume_input = drop_consume_input;
-}
-
-
-LOCAL(void)
-drop_request_from_src (j_decompress_ptr dropinfo, j_decompress_ptr srcinfo)
-{
-  void *save_client_data;
-  JMETHOD(jvirt_barray_ptr, save_request_virt_barray,
-	  (j_common_ptr cinfo, int pool_id, boolean pre_zero,
-	   JDIMENSION blocksperrow, JDIMENSION numrows, JDIMENSION maxaccess));
-  JMETHOD(void, save_start_input_pass, (j_decompress_ptr cinfo));
-
-  /* Set custom method pointers, save original pointers */
-  save_client_data = dropinfo->client_data;
-  dropinfo->client_data = (void *) srcinfo;
-  save_request_virt_barray = dropinfo->mem->request_virt_barray;
-  dropinfo->mem->request_virt_barray = drop_request_virt_barray;
-  save_start_input_pass = dropinfo->inputctl->start_input_pass;
-  dropinfo->inputctl->start_input_pass = drop_start_input_pass;
-
-  /* Execute only initialization part.
-   * Requested coefficient arrays will be realized later by the srcinfo object.
-   * Next call to the same function will then do the actual data reading.
-   * NB: since we request the coefficient arrays from another object,
-   * the inherent realization call is effectively a no-op.
-   */
-  (void) jpeg_read_coefficients(dropinfo);
-
-  /* Reset method pointers */
-  dropinfo->client_data = save_client_data;
-  dropinfo->mem->request_virt_barray = save_request_virt_barray;
-  dropinfo->inputctl->start_input_pass = save_start_input_pass;
-  /* Do input initialization for first scan now,
-   * which also resets the consume_input method.
-   */
-  (*save_start_input_pass)(dropinfo);
-}
-#endif /* DROP_REQUEST_FROM_SRC */
-
-
-LOCAL(void)
-dequant_comp (j_decompress_ptr cinfo, jpeg_component_info *compptr,
-	      jvirt_barray_ptr coef_array, JQUANT_TBL *qtblptr1)
-{
-  JDIMENSION blk_x, blk_y;
-  int offset_y, k;
-  JQUANT_TBL *qtblptr;
-  JBLOCKARRAY buffer;
-  JBLOCKROW block;
-  JCOEFPTR ptr;
-
-  qtblptr = compptr->quant_table;
-  for (blk_y = 0; blk_y < compptr->height_in_blocks;
-       blk_y += compptr->v_samp_factor) {
-    buffer = (*cinfo->mem->access_virt_barray)
-      ((j_common_ptr) cinfo, coef_array, blk_y,
-       (JDIMENSION) compptr->v_samp_factor, TRUE);
-    for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-      block = buffer[offset_y];
-      for (blk_x = 0; blk_x < compptr->width_in_blocks; blk_x++) {
-	ptr = block[blk_x];
-	for (k = 0; k < DCTSIZE2; k++)
-	  if (qtblptr->quantval[k] != qtblptr1->quantval[k])
-	    ptr[k] *= qtblptr->quantval[k] / qtblptr1->quantval[k];
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-requant_comp (j_decompress_ptr cinfo, jpeg_component_info *compptr,
-	      jvirt_barray_ptr coef_array, JQUANT_TBL *qtblptr1)
-{
-  JDIMENSION blk_x, blk_y;
-  int offset_y, k, temp, qval;
-  JQUANT_TBL *qtblptr;
-  JBLOCKARRAY buffer;
-  JBLOCKROW block;
-  JCOEFPTR ptr;
-
-  qtblptr = compptr->quant_table;
-  for (blk_y = 0; blk_y < compptr->height_in_blocks;
-       blk_y += compptr->v_samp_factor) {
-    buffer = (*cinfo->mem->access_virt_barray)
-      ((j_common_ptr) cinfo, coef_array, blk_y,
-       (JDIMENSION) compptr->v_samp_factor, TRUE);
-    for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-      block = buffer[offset_y];
-      for (blk_x = 0; blk_x < compptr->width_in_blocks; blk_x++) {
-	ptr = block[blk_x];
-	for (k = 0; k < DCTSIZE2; k++) {
-	  qval = qtblptr1->quantval[k];
-	  if (qval == 0) continue;
-	  temp = qtblptr->quantval[k];
-	  if (temp == qval) continue;
-	  temp *= ptr[k];
-	  /* The following quantization code is a copy from jcdctmgr.c */
-#ifdef FAST_DIVIDE
-#define DIVIDE_BY(a,b)	a /= b
-#else
-#define DIVIDE_BY(a,b)	if (a >= b) a /= b; else a = 0
-#endif
-	  if (temp < 0) {
-	    temp = -temp;
-	    temp += qval>>1;	/* for rounding */
-	    DIVIDE_BY(temp, qval);
-	    temp = -temp;
-	  } else {
-	    temp += qval>>1;	/* for rounding */
-	    DIVIDE_BY(temp, qval);
-	  }
-	  ptr[k] = (JCOEF) temp;
-	}
-      }
-    }
-  }
-}
-
-
-/* Calculate largest common denominator with Euclid's algorithm.
- */
-LOCAL(JCOEF)
-largest_common_denominator(JCOEF a, JCOEF b)
-{
-  JCOEF c;
-
-  while (b) {
-    c = a % b;
-    a = b;
-    b = c;
-  }
-
-  return a;
-}
-
-
-LOCAL(void)
-adjust_quant(j_decompress_ptr srcinfo, jvirt_barray_ptr *src_coef_arrays,
-	     j_decompress_ptr dropinfo, jvirt_barray_ptr *drop_coef_arrays,
-	     boolean trim, j_compress_ptr dstinfo)
-{
-  jpeg_component_info *compptr1, *compptr2;
-  JQUANT_TBL *qtblptr1, *qtblptr2, *qtblptr3;
-  int ci, k;
-
-  for (ci = 0; ci < dstinfo->num_components &&
-	       ci < dropinfo->num_components; ci++) {
-    compptr1 = srcinfo->comp_info + ci;
-    compptr2 = dropinfo->comp_info + ci;
-    qtblptr1 = compptr1->quant_table;
-    qtblptr2 = compptr2->quant_table;
-    for (k = 0; k < DCTSIZE2; k++) {
-      if (qtblptr1->quantval[k] != qtblptr2->quantval[k]) {
-	if (trim)
-	  requant_comp(dropinfo, compptr2, drop_coef_arrays[ci], qtblptr1);
-	else {
-	  qtblptr3 = dstinfo->quant_tbl_ptrs[compptr1->quant_tbl_no];
-	  for (k = 0; k < DCTSIZE2; k++)
-	    if (qtblptr1->quantval[k] != qtblptr2->quantval[k])
-	      qtblptr3->quantval[k] = largest_common_denominator
-		(qtblptr1->quantval[k], qtblptr2->quantval[k]);
-	  dequant_comp(srcinfo, compptr1, src_coef_arrays[ci], qtblptr3);
-	  dequant_comp(dropinfo, compptr2, drop_coef_arrays[ci], qtblptr3);
-	}
-	break;
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_drop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	 JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	 jvirt_barray_ptr *src_coef_arrays,
-	 j_decompress_ptr dropinfo, jvirt_barray_ptr *drop_coef_arrays,
-	 JDIMENSION drop_width, JDIMENSION drop_height)
-/* Drop.  If the dropinfo component number is smaller than the destination's,
- * we fill in the remaining components with zero.  This provides the feature
- * of dropping grayscale into (arbitrarily sampled) color images.
- */
-{
-  JDIMENSION comp_width, comp_height;
-  JDIMENSION blk_y, x_drop_blocks, y_drop_blocks;
-  int ci, offset_y;
-  JBLOCKARRAY src_buffer, dst_buffer;
-  jpeg_component_info *compptr;
-
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    comp_width = drop_width * compptr->h_samp_factor;
-    comp_height = drop_height * compptr->v_samp_factor;
-    x_drop_blocks = x_crop_offset * compptr->h_samp_factor;
-    y_drop_blocks = y_crop_offset * compptr->v_samp_factor;
-    for (blk_y = 0; blk_y < comp_height; blk_y += compptr->v_samp_factor) {
-      dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, src_coef_arrays[ci], blk_y + y_drop_blocks,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      if (ci < dropinfo->num_components) {
-#if DROP_REQUEST_FROM_SRC
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, drop_coef_arrays[ci], blk_y,
-#else
-	src_buffer = (*dropinfo->mem->access_virt_barray)
-	  ((j_common_ptr) dropinfo, drop_coef_arrays[ci], blk_y,
-#endif
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
-	for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	  jcopy_block_row(src_buffer[offset_y],
-			  dst_buffer[offset_y] + x_drop_blocks,
-			  comp_width);
-	}
-      } else {
-	for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	  FMEMZERO(dst_buffer[offset_y] + x_drop_blocks,
-		   comp_width * SIZEOF(JBLOCK));
-	} 	
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	 JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	 jvirt_barray_ptr *src_coef_arrays,
-	 jvirt_barray_ptr *dst_coef_arrays)
-/* Crop.  This is only used when no rotate/flip is requested with the crop. */
-{
-  JDIMENSION dst_blk_y, x_crop_blocks, y_crop_blocks;
-  int ci, offset_y;
-  JBLOCKARRAY src_buffer, dst_buffer;
-  jpeg_component_info *compptr;
-
-  /* We simply have to copy the right amount of data (the destination's
-   * image size) starting at the given X and Y offsets in the source.
-   */
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
-    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
-    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
-      dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      src_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	 dst_blk_y + y_crop_blocks,
-	 (JDIMENSION) compptr->v_samp_factor, FALSE);
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
-			dst_buffer[offset_y],
-			compptr->width_in_blocks);
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_crop_ext_zero (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-		  JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-		  jvirt_barray_ptr *src_coef_arrays,
-		  jvirt_barray_ptr *dst_coef_arrays)
-/* Crop.  This is only used when no rotate/flip is requested with the crop.
- * Extension: If the destination size is larger than the source, we fill in
- * the extra area with zero (neutral gray).  Note we also have to zero partial
- * iMCUs at the right and bottom edge of the source image area in this case.
- */
-{
-  JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height;
-  JDIMENSION dst_blk_y, x_crop_blocks, y_crop_blocks;
-  int ci, offset_y;
-  JBLOCKARRAY src_buffer, dst_buffer;
-  jpeg_component_info *compptr;
-
-  MCU_cols = srcinfo->output_width /
-    (dstinfo->max_h_samp_factor * dstinfo->min_DCT_h_scaled_size);
-  MCU_rows = srcinfo->output_height /
-    (dstinfo->max_v_samp_factor * dstinfo->min_DCT_v_scaled_size);
-
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    comp_width = MCU_cols * compptr->h_samp_factor;
-    comp_height = MCU_rows * compptr->v_samp_factor;
-    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
-    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
-    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
-      dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      if (dstinfo->jpeg_height > srcinfo->output_height) {
-	if (dst_blk_y < y_crop_blocks ||
-	    dst_blk_y >= y_crop_blocks + comp_height) {
-	  for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	    FMEMZERO(dst_buffer[offset_y],
-		     compptr->width_in_blocks * SIZEOF(JBLOCK));
-	  }
-	  continue;
-	}
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	   dst_blk_y - y_crop_blocks,
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
-      } else {
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	   dst_blk_y + y_crop_blocks,
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
-      }
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	if (dstinfo->jpeg_width > srcinfo->output_width) {
-	  if (x_crop_blocks > 0) {
-	    FMEMZERO(dst_buffer[offset_y],
-		     x_crop_blocks * SIZEOF(JBLOCK));
-	  }
-	  jcopy_block_row(src_buffer[offset_y],
-			  dst_buffer[offset_y] + x_crop_blocks,
-			  comp_width);
-	  if (compptr->width_in_blocks > x_crop_blocks + comp_width) {
-	    FMEMZERO(dst_buffer[offset_y] +
-		       x_crop_blocks + comp_width,
-		     (compptr->width_in_blocks -
-		       x_crop_blocks - comp_width) * SIZEOF(JBLOCK));
-	  }
-	} else {
-	  jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
-			  dst_buffer[offset_y],
-			  compptr->width_in_blocks);
-	}
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_crop_ext_flat (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-		  JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-		  jvirt_barray_ptr *src_coef_arrays,
-		  jvirt_barray_ptr *dst_coef_arrays)
-/* Crop.  This is only used when no rotate/flip is requested with the crop.
- * Extension: The destination width is larger than the source and we fill in
- * the extra area with the DC of the adjacent block.  Note we also have to
- * fill partial iMCUs at the right and bottom edge of the source image area
- * in this case.
- */
-{
-  JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height;
-  JDIMENSION dst_blk_x, dst_blk_y, x_crop_blocks, y_crop_blocks;
-  int ci, offset_y;
-  JCOEF dc;
-  JBLOCKARRAY src_buffer, dst_buffer;
-  jpeg_component_info *compptr;
-
-  MCU_cols = srcinfo->output_width /
-    (dstinfo->max_h_samp_factor * dstinfo->min_DCT_h_scaled_size);
-  MCU_rows = srcinfo->output_height /
-    (dstinfo->max_v_samp_factor * dstinfo->min_DCT_v_scaled_size);
-
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    comp_width = MCU_cols * compptr->h_samp_factor;
-    comp_height = MCU_rows * compptr->v_samp_factor;
-    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
-    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
-    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
-      dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      if (dstinfo->jpeg_height > srcinfo->output_height) {
-	if (dst_blk_y < y_crop_blocks ||
-	    dst_blk_y >= y_crop_blocks + comp_height) {
-	  for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	    FMEMZERO(dst_buffer[offset_y],
-		     compptr->width_in_blocks * SIZEOF(JBLOCK));
-	  }
-	  continue;
-	}
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	   dst_blk_y - y_crop_blocks,
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
-      } else {
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	   dst_blk_y + y_crop_blocks,
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
-      }
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	if (x_crop_blocks > 0) {
-	  FMEMZERO(dst_buffer[offset_y],
-		   x_crop_blocks * SIZEOF(JBLOCK));
-	  dc = src_buffer[offset_y][0][0];
-	  for (dst_blk_x = 0; dst_blk_x < x_crop_blocks; dst_blk_x++) {
-	    dst_buffer[offset_y][dst_blk_x][0] = dc;
-	  }
-	}
-	jcopy_block_row(src_buffer[offset_y],
-			dst_buffer[offset_y] + x_crop_blocks,
-			comp_width);
-	if (compptr->width_in_blocks > x_crop_blocks + comp_width) {
-	  FMEMZERO(dst_buffer[offset_y] +
-		     x_crop_blocks + comp_width,
-		   (compptr->width_in_blocks -
-		     x_crop_blocks - comp_width) * SIZEOF(JBLOCK));
-	  dc = src_buffer[offset_y][comp_width - 1][0];
-	  for (dst_blk_x = x_crop_blocks + comp_width;
-	       dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
-	    dst_buffer[offset_y][dst_blk_x][0] = dc;
-	  }
-	}
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_crop_ext_reflect (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-		     JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-		     jvirt_barray_ptr *src_coef_arrays,
-		     jvirt_barray_ptr *dst_coef_arrays)
-/* Crop.  This is only used when no rotate/flip is requested with the crop.
- * Extension: The destination width is larger than the source and we fill in
- * the extra area with repeated reflections of the source region.  Note we
- * also have to fill partial iMCUs at the right and bottom edge of the source
- * image area in this case.
- */
-{
-  JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height, src_blk_x;
-  JDIMENSION dst_blk_x, dst_blk_y, x_crop_blocks, y_crop_blocks;
-  int ci, k, offset_y;
-  JBLOCKARRAY src_buffer, dst_buffer;
-  JBLOCKROW src_row_ptr, dst_row_ptr;
-  JCOEFPTR src_ptr, dst_ptr;
-  jpeg_component_info *compptr;
-
-  MCU_cols = srcinfo->output_width /
-    (dstinfo->max_h_samp_factor * dstinfo->min_DCT_h_scaled_size);
-  MCU_rows = srcinfo->output_height /
-    (dstinfo->max_v_samp_factor * dstinfo->min_DCT_v_scaled_size);
-
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    comp_width = MCU_cols * compptr->h_samp_factor;
-    comp_height = MCU_rows * compptr->v_samp_factor;
-    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
-    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
-    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
-      dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      if (dstinfo->jpeg_height > srcinfo->output_height) {
-	if (dst_blk_y < y_crop_blocks ||
-	    dst_blk_y >= y_crop_blocks + comp_height) {
-	  for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	    FMEMZERO(dst_buffer[offset_y],
-		     compptr->width_in_blocks * SIZEOF(JBLOCK));
-	  }
-	  continue;
-	}
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	   dst_blk_y - y_crop_blocks,
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
-      } else {
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	   dst_blk_y + y_crop_blocks,
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
-      }
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	/* Copy source region */
-	jcopy_block_row(src_buffer[offset_y],
-			dst_buffer[offset_y] + x_crop_blocks,
-			comp_width);
-	if (x_crop_blocks > 0) {
-	  /* Reflect to left */
-	  dst_row_ptr = dst_buffer[offset_y] + x_crop_blocks;
-	  for (dst_blk_x = x_crop_blocks; dst_blk_x > 0;) {
-	    src_row_ptr = dst_row_ptr;	   /* (re)set axis of reflection */
-	    for (src_blk_x = comp_width; src_blk_x > 0 && dst_blk_x > 0;
-		 src_blk_x--, dst_blk_x--) {
-	      dst_ptr = *--dst_row_ptr;	   /* destination goes left */
-	      src_ptr = *src_row_ptr++;	   /* source goes right */
-	      /* this unrolled loop doesn't need to know which row it's on... */
-	      for (k = 0; k < DCTSIZE2; k += 2) {
-		*dst_ptr++ = *src_ptr++;   /* copy even column */
-		*dst_ptr++ = - *src_ptr++; /* copy odd column with sign change */
-	      }
-	    }
-	  }
-	}
-	if (compptr->width_in_blocks > x_crop_blocks + comp_width) {
-	  /* Reflect to right */
-	  dst_row_ptr = dst_buffer[offset_y] + x_crop_blocks + comp_width;
-	  for (dst_blk_x = compptr->width_in_blocks - x_crop_blocks - comp_width;
-	       dst_blk_x > 0;) {
-	    src_row_ptr = dst_row_ptr;	   /* (re)set axis of reflection */
-	    for (src_blk_x = comp_width; src_blk_x > 0 && dst_blk_x > 0;
-		 src_blk_x--, dst_blk_x--) {
-	      dst_ptr = *dst_row_ptr++;	   /* destination goes right */
-	      src_ptr = *--src_row_ptr;	   /* source goes left */
-	      /* this unrolled loop doesn't need to know which row it's on... */
-	      for (k = 0; k < DCTSIZE2; k += 2) {
-		*dst_ptr++ = *src_ptr++;   /* copy even column */
-		*dst_ptr++ = - *src_ptr++; /* copy odd column with sign change */
-	      }
-	    }
-	  }
-	}
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_wipe (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	 JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	 jvirt_barray_ptr *src_coef_arrays,
-	 JDIMENSION drop_width, JDIMENSION drop_height)
-/* Wipe - drop content of specified area, fill with zero (neutral gray) */
-{
-  JDIMENSION x_wipe_blocks, wipe_width;
-  JDIMENSION y_wipe_blocks, wipe_bottom;
-  int ci, offset_y;
-  JBLOCKARRAY buffer;
-  jpeg_component_info *compptr;
-
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    x_wipe_blocks = x_crop_offset * compptr->h_samp_factor;
-    wipe_width = drop_width * compptr->h_samp_factor;
-    y_wipe_blocks = y_crop_offset * compptr->v_samp_factor;
-    wipe_bottom = drop_height * compptr->v_samp_factor + y_wipe_blocks;
-    for (; y_wipe_blocks < wipe_bottom;
-	 y_wipe_blocks += compptr->v_samp_factor) {
-      buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, src_coef_arrays[ci], y_wipe_blocks,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	FMEMZERO(buffer[offset_y] + x_wipe_blocks,
-		 wipe_width * SIZEOF(JBLOCK));
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_flatten (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	    JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	    jvirt_barray_ptr *src_coef_arrays,
-	    JDIMENSION drop_width, JDIMENSION drop_height)
-/* Flatten - drop content of specified area, similar to wipe,
- * but fill with average of adjacent blocks, instead of zero.
- */
-{
-  JDIMENSION x_wipe_blocks, wipe_width, wipe_right;
-  JDIMENSION y_wipe_blocks, wipe_bottom, blk_x;
-  int ci, offset_y, dc_left_value, dc_right_value, average;
-  JBLOCKARRAY buffer;
-  jpeg_component_info *compptr;
-
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    x_wipe_blocks = x_crop_offset * compptr->h_samp_factor;
-    wipe_width = drop_width * compptr->h_samp_factor;
-    wipe_right = wipe_width + x_wipe_blocks;
-    y_wipe_blocks = y_crop_offset * compptr->v_samp_factor;
-    wipe_bottom = drop_height * compptr->v_samp_factor + y_wipe_blocks;
-    for (; y_wipe_blocks < wipe_bottom;
-	 y_wipe_blocks += compptr->v_samp_factor) {
-      buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, src_coef_arrays[ci], y_wipe_blocks,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	FMEMZERO(buffer[offset_y] + x_wipe_blocks,
-		 wipe_width * SIZEOF(JBLOCK));
-	if (x_wipe_blocks > 0) {
-	  dc_left_value = buffer[offset_y][x_wipe_blocks - 1][0];
-	  if (wipe_right < compptr->width_in_blocks) {
-	    dc_right_value = buffer[offset_y][wipe_right][0];
-	    average = (dc_left_value + dc_right_value) >> 1;
-	  } else {
-	    average = dc_left_value;
-	  }
-	} else if (wipe_right < compptr->width_in_blocks) {
-	  average = buffer[offset_y][wipe_right][0];
-	} else continue;
-	for (blk_x = x_wipe_blocks; blk_x < wipe_right; blk_x++) {
-	  buffer[offset_y][blk_x][0] = (JCOEF) average;
-	}
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_reflect (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	    JDIMENSION x_crop_offset,
-	    jvirt_barray_ptr *src_coef_arrays,
-	    JDIMENSION drop_width, JDIMENSION drop_height)
-/* Reflect - drop content of specified area, similar to wipe, but
- * fill with repeated reflections of the outside area, instead of zero.
- * NB: y_crop_offset is assumed to be zero.
- */
-{
-  JDIMENSION x_wipe_blocks, wipe_width;
-  JDIMENSION y_wipe_blocks, wipe_bottom;
-  JDIMENSION src_blk_x, dst_blk_x;
-  int ci, k, offset_y;
-  JBLOCKARRAY buffer;
-  JBLOCKROW src_row_ptr, dst_row_ptr;
-  JCOEFPTR src_ptr, dst_ptr;
-  jpeg_component_info *compptr;
-
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    x_wipe_blocks = x_crop_offset * compptr->h_samp_factor;
-    wipe_width = drop_width * compptr->h_samp_factor;
-    wipe_bottom = drop_height * compptr->v_samp_factor;
-    for (y_wipe_blocks = 0; y_wipe_blocks < wipe_bottom;
-	 y_wipe_blocks += compptr->v_samp_factor) {
-      buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, src_coef_arrays[ci], y_wipe_blocks,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	if (x_wipe_blocks > 0) {
-	  /* Reflect from left */
-	  dst_row_ptr = buffer[offset_y] + x_wipe_blocks;
-	  for (dst_blk_x = wipe_width; dst_blk_x > 0;) {
-	    src_row_ptr = dst_row_ptr;	   /* (re)set axis of reflection */
-	    for (src_blk_x = x_wipe_blocks;
-		 src_blk_x > 0 && dst_blk_x > 0; src_blk_x--, dst_blk_x--) {
-	      dst_ptr = *dst_row_ptr++;	   /* destination goes right */
-	      src_ptr = *--src_row_ptr;	   /* source goes left */
-	      /* this unrolled loop doesn't need to know which row it's on... */
-	      for (k = 0; k < DCTSIZE2; k += 2) {
-		*dst_ptr++ = *src_ptr++;   /* copy even column */
-		*dst_ptr++ = - *src_ptr++; /* copy odd column with sign change */
-	      }
-	    }
-	  }
-	} else if (compptr->width_in_blocks > x_wipe_blocks + wipe_width) {
-	  /* Reflect from right */
-	  dst_row_ptr = buffer[offset_y] + x_wipe_blocks + wipe_width;
-	  for (dst_blk_x = wipe_width; dst_blk_x > 0;) {
-	    src_row_ptr = dst_row_ptr;	   /* (re)set axis of reflection */
-	    src_blk_x = compptr->width_in_blocks - x_wipe_blocks - wipe_width;
-	    for (; src_blk_x > 0 && dst_blk_x > 0; src_blk_x--, dst_blk_x--) {
-	      dst_ptr = *--dst_row_ptr;	   /* destination goes left */
-	      src_ptr = *src_row_ptr++;	   /* source goes right */
-	      /* this unrolled loop doesn't need to know which row it's on... */
-	      for (k = 0; k < DCTSIZE2; k += 2) {
-		*dst_ptr++ = *src_ptr++;   /* copy even column */
-		*dst_ptr++ = - *src_ptr++; /* copy odd column with sign change */
-	      }
-	    }
-	  }
-	} else {
-	  FMEMZERO(buffer[offset_y] + x_wipe_blocks,
-		   wipe_width * SIZEOF(JBLOCK));
-	}
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_flip_h_no_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-		   JDIMENSION x_crop_offset,
-		   jvirt_barray_ptr *src_coef_arrays)
-/* Horizontal flip; done in-place, so no separate dest array is required.
- * NB: this only works when y_crop_offset is zero.
- */
-{
-  JDIMENSION MCU_cols, comp_width, blk_x, blk_y, x_crop_blocks;
-  int ci, k, offset_y;
-  JBLOCKARRAY buffer;
-  JCOEFPTR ptr1, ptr2;
-  JCOEF temp1, temp2;
-  jpeg_component_info *compptr;
-
-  /* Horizontal mirroring of DCT blocks is accomplished by swapping
-   * pairs of blocks in-place.  Within a DCT block, we perform horizontal
-   * mirroring by changing the signs of odd-numbered columns.
-   * Partial iMCUs at the right edge are left untouched.
-   */
-  MCU_cols = srcinfo->output_width /
-    (dstinfo->max_h_samp_factor * dstinfo->min_DCT_h_scaled_size);
-
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    comp_width = MCU_cols * compptr->h_samp_factor;
-    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
-    for (blk_y = 0; blk_y < compptr->height_in_blocks;
-	 blk_y += compptr->v_samp_factor) {
-      buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, src_coef_arrays[ci], blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	/* Do the mirroring */
-	for (blk_x = 0; blk_x * 2 < comp_width; blk_x++) {
-	  ptr1 = buffer[offset_y][blk_x];
-	  ptr2 = buffer[offset_y][comp_width - blk_x - 1];
-	  /* this unrolled loop doesn't need to know which row it's on... */
-	  for (k = 0; k < DCTSIZE2; k += 2) {
-	    temp1 = *ptr1;	/* swap even column */
-	    temp2 = *ptr2;
-	    *ptr1++ = temp2;
-	    *ptr2++ = temp1;
-	    temp1 = *ptr1;	/* swap odd column with sign change */
-	    temp2 = *ptr2;
-	    *ptr1++ = -temp2;
-	    *ptr2++ = -temp1;
-	  }
-	}
-	if (x_crop_blocks > 0) {
-	  /* Now left-justify the portion of the data to be kept.
-	   * We can't use a single jcopy_block_row() call because that routine
-	   * depends on memcpy(), whose behavior is unspecified for overlapping
-	   * source and destination areas.  Sigh.
-	   */
-	  for (blk_x = 0; blk_x < compptr->width_in_blocks; blk_x++) {
-	    jcopy_block_row(buffer[offset_y] + blk_x + x_crop_blocks,
-			    buffer[offset_y] + blk_x,
-			    (JDIMENSION) 1);
-	  }
-	}
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_flip_h (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	   JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	   jvirt_barray_ptr *src_coef_arrays,
-	   jvirt_barray_ptr *dst_coef_arrays)
-/* Horizontal flip in general cropping case */
-{
-  JDIMENSION MCU_cols, comp_width, dst_blk_x, dst_blk_y;
-  JDIMENSION x_crop_blocks, y_crop_blocks;
-  int ci, k, offset_y;
-  JBLOCKARRAY src_buffer, dst_buffer;
-  JBLOCKROW src_row_ptr, dst_row_ptr;
-  JCOEFPTR src_ptr, dst_ptr;
-  jpeg_component_info *compptr;
-
-  /* Here we must output into a separate array because we can't touch
-   * different rows of a single virtual array simultaneously.  Otherwise,
-   * this is essentially the same as the routine above.
-   */
-  MCU_cols = srcinfo->output_width /
-    (dstinfo->max_h_samp_factor * dstinfo->min_DCT_h_scaled_size);
-
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    comp_width = MCU_cols * compptr->h_samp_factor;
-    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
-    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
-    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
-      dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      src_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	 dst_blk_y + y_crop_blocks,
-	 (JDIMENSION) compptr->v_samp_factor, FALSE);
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	dst_row_ptr = dst_buffer[offset_y];
-	src_row_ptr = src_buffer[offset_y];
-	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
-	  if (x_crop_blocks + dst_blk_x < comp_width) {
-	    /* Do the mirrorable blocks */
-	    dst_ptr = dst_row_ptr[dst_blk_x];
-	    src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
-	    /* this unrolled loop doesn't need to know which row it's on... */
-	    for (k = 0; k < DCTSIZE2; k += 2) {
-	      *dst_ptr++ = *src_ptr++;	 /* copy even column */
-	      *dst_ptr++ = - *src_ptr++; /* copy odd column with sign change */
-	    }
-	  } else {
-	    /* Copy last partial block(s) verbatim */
-	    jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks,
-			    dst_row_ptr + dst_blk_x,
-			    (JDIMENSION) 1);
-	  }
-	}
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_flip_v (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	   JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	   jvirt_barray_ptr *src_coef_arrays,
-	   jvirt_barray_ptr *dst_coef_arrays)
-/* Vertical flip */
-{
-  JDIMENSION MCU_rows, comp_height, dst_blk_x, dst_blk_y;
-  JDIMENSION x_crop_blocks, y_crop_blocks;
-  int ci, i, j, offset_y;
-  JBLOCKARRAY src_buffer, dst_buffer;
-  JBLOCKROW src_row_ptr, dst_row_ptr;
-  JCOEFPTR src_ptr, dst_ptr;
-  jpeg_component_info *compptr;
-
-  /* We output into a separate array because we can't touch different
-   * rows of the source virtual array simultaneously.  Otherwise, this
-   * is a pretty straightforward analog of horizontal flip.
-   * Within a DCT block, vertical mirroring is done by changing the signs
-   * of odd-numbered rows.
-   * Partial iMCUs at the bottom edge are copied verbatim.
-   */
-  MCU_rows = srcinfo->output_height /
-    (dstinfo->max_v_samp_factor * dstinfo->min_DCT_v_scaled_size);
-
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    comp_height = MCU_rows * compptr->v_samp_factor;
-    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
-    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
-    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
-      dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      if (y_crop_blocks + dst_blk_y < comp_height) {
-	/* Row is within the mirrorable area. */
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	   comp_height - y_crop_blocks - dst_blk_y -
-	   (JDIMENSION) compptr->v_samp_factor,
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
-      } else {
-	/* Bottom-edge blocks will be copied verbatim. */
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	   dst_blk_y + y_crop_blocks,
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
-      }
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	if (y_crop_blocks + dst_blk_y < comp_height) {
-	  /* Row is within the mirrorable area. */
-	  dst_row_ptr = dst_buffer[offset_y];
-	  src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1];
-	  src_row_ptr += x_crop_blocks;
-	  for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
-	       dst_blk_x++) {
-	    dst_ptr = dst_row_ptr[dst_blk_x];
-	    src_ptr = src_row_ptr[dst_blk_x];
-	    for (i = 0; i < DCTSIZE; i += 2) {
-	      /* copy even row */
-	      for (j = 0; j < DCTSIZE; j++)
-		*dst_ptr++ = *src_ptr++;
-	      /* copy odd row with sign change */
-	      for (j = 0; j < DCTSIZE; j++)
-		*dst_ptr++ = - *src_ptr++;
-	    }
-	  }
-	} else {
-	  /* Just copy row verbatim. */
-	  jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
-			  dst_buffer[offset_y],
-			  compptr->width_in_blocks);
-	}
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_transpose (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	      JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	      jvirt_barray_ptr *src_coef_arrays,
-	      jvirt_barray_ptr *dst_coef_arrays)
-/* Transpose source into destination */
-{
-  JDIMENSION dst_blk_x, dst_blk_y, x_crop_blocks, y_crop_blocks;
-  int ci, i, j, offset_x, offset_y;
-  JBLOCKARRAY src_buffer, dst_buffer;
-  JCOEFPTR src_ptr, dst_ptr;
-  jpeg_component_info *compptr;
-
-  /* Transposing pixels within a block just requires transposing the
-   * DCT coefficients.
-   * Partial iMCUs at the edges require no special treatment; we simply
-   * process all the available DCT blocks for every component.
-   */
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
-    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
-    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
-      dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
-	     dst_blk_x += compptr->h_samp_factor) {
-	  src_buffer = (*srcinfo->mem->access_virt_barray)
-	    ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	     dst_blk_x + x_crop_blocks,
-	     (JDIMENSION) compptr->h_samp_factor, FALSE);
-	  for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
-	    dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
-	    src_ptr = src_buffer[offset_x][dst_blk_y + offset_y + y_crop_blocks];
-	    for (i = 0; i < DCTSIZE; i++)
-	      for (j = 0; j < DCTSIZE; j++)
-		dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-	  }
-	}
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_rot_90 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	   JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	   jvirt_barray_ptr *src_coef_arrays,
-	   jvirt_barray_ptr *dst_coef_arrays)
-/* 90 degree rotation is equivalent to
- *   1. Transposing the image;
- *   2. Horizontal mirroring.
- * These two steps are merged into a single processing routine.
- */
-{
-  JDIMENSION MCU_cols, comp_width, dst_blk_x, dst_blk_y;
-  JDIMENSION x_crop_blocks, y_crop_blocks;
-  int ci, i, j, offset_x, offset_y;
-  JBLOCKARRAY src_buffer, dst_buffer;
-  JCOEFPTR src_ptr, dst_ptr;
-  jpeg_component_info *compptr;
-
-  /* Because of the horizontal mirror step, we can't process partial iMCUs
-   * at the (output) right edge properly.  They just get transposed and
-   * not mirrored.
-   */
-  MCU_cols = srcinfo->output_height /
-    (dstinfo->max_h_samp_factor * dstinfo->min_DCT_h_scaled_size);
-
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    comp_width = MCU_cols * compptr->h_samp_factor;
-    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
-    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
-    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
-      dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
-	     dst_blk_x += compptr->h_samp_factor) {
-	  if (x_crop_blocks + dst_blk_x < comp_width) {
-	    /* Block is within the mirrorable area. */
-	    src_buffer = (*srcinfo->mem->access_virt_barray)
-	      ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	       comp_width - x_crop_blocks - dst_blk_x -
-	       (JDIMENSION) compptr->h_samp_factor,
-	       (JDIMENSION) compptr->h_samp_factor, FALSE);
-	  } else {
-	    /* Edge blocks are transposed but not mirrored. */
-	    src_buffer = (*srcinfo->mem->access_virt_barray)
-	      ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	       dst_blk_x + x_crop_blocks,
-	       (JDIMENSION) compptr->h_samp_factor, FALSE);
-	  }
-	  for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
-	    dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
-	    if (x_crop_blocks + dst_blk_x < comp_width) {
-	      /* Block is within the mirrorable area. */
-	      src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
-		[dst_blk_y + offset_y + y_crop_blocks];
-	      for (i = 0; i < DCTSIZE; i++) {
-		for (j = 0; j < DCTSIZE; j++)
-		  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		i++;
-		for (j = 0; j < DCTSIZE; j++)
-		  dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-	      }
-	    } else {
-	      /* Edge blocks are transposed but not mirrored. */
-	      src_ptr = src_buffer[offset_x]
-		[dst_blk_y + offset_y + y_crop_blocks];
-	      for (i = 0; i < DCTSIZE; i++)
-		for (j = 0; j < DCTSIZE; j++)
-		  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-	    }
-	  }
-	}
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_rot_270 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	    JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	    jvirt_barray_ptr *src_coef_arrays,
-	    jvirt_barray_ptr *dst_coef_arrays)
-/* 270 degree rotation is equivalent to
- *   1. Horizontal mirroring;
- *   2. Transposing the image.
- * These two steps are merged into a single processing routine.
- */
-{
-  JDIMENSION MCU_rows, comp_height, dst_blk_x, dst_blk_y;
-  JDIMENSION x_crop_blocks, y_crop_blocks;
-  int ci, i, j, offset_x, offset_y;
-  JBLOCKARRAY src_buffer, dst_buffer;
-  JCOEFPTR src_ptr, dst_ptr;
-  jpeg_component_info *compptr;
-
-  /* Because of the horizontal mirror step, we can't process partial iMCUs
-   * at the (output) bottom edge properly.  They just get transposed and
-   * not mirrored.
-   */
-  MCU_rows = srcinfo->output_width /
-    (dstinfo->max_v_samp_factor * dstinfo->min_DCT_v_scaled_size);
-
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    comp_height = MCU_rows * compptr->v_samp_factor;
-    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
-    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
-    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
-      dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
-	     dst_blk_x += compptr->h_samp_factor) {
-	  src_buffer = (*srcinfo->mem->access_virt_barray)
-	    ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	     dst_blk_x + x_crop_blocks,
-	     (JDIMENSION) compptr->h_samp_factor, FALSE);
-	  for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
-	    dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
-	    if (y_crop_blocks + dst_blk_y < comp_height) {
-	      /* Block is within the mirrorable area. */
-	      src_ptr = src_buffer[offset_x]
-		[comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
-	      for (i = 0; i < DCTSIZE; i++) {
-		for (j = 0; j < DCTSIZE; j++) {
-		  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		  j++;
-		  dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-		}
-	      }
-	    } else {
-	      /* Edge blocks are transposed but not mirrored. */
-	      src_ptr = src_buffer[offset_x]
-		[dst_blk_y + offset_y + y_crop_blocks];
-	      for (i = 0; i < DCTSIZE; i++)
-		for (j = 0; j < DCTSIZE; j++)
-		  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-	    }
-	  }
-	}
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_rot_180 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	    JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	    jvirt_barray_ptr *src_coef_arrays,
-	    jvirt_barray_ptr *dst_coef_arrays)
-/* 180 degree rotation is equivalent to
- *   1. Vertical mirroring;
- *   2. Horizontal mirroring.
- * These two steps are merged into a single processing routine.
- */
-{
-  JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height, dst_blk_x, dst_blk_y;
-  JDIMENSION x_crop_blocks, y_crop_blocks;
-  int ci, i, j, offset_y;
-  JBLOCKARRAY src_buffer, dst_buffer;
-  JBLOCKROW src_row_ptr, dst_row_ptr;
-  JCOEFPTR src_ptr, dst_ptr;
-  jpeg_component_info *compptr;
-
-  MCU_cols = srcinfo->output_width /
-    (dstinfo->max_h_samp_factor * dstinfo->min_DCT_h_scaled_size);
-  MCU_rows = srcinfo->output_height /
-    (dstinfo->max_v_samp_factor * dstinfo->min_DCT_v_scaled_size);
-
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    comp_width = MCU_cols * compptr->h_samp_factor;
-    comp_height = MCU_rows * compptr->v_samp_factor;
-    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
-    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
-    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
-      dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      if (y_crop_blocks + dst_blk_y < comp_height) {
-	/* Row is within the vertically mirrorable area. */
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	   comp_height - y_crop_blocks - dst_blk_y -
-	   (JDIMENSION) compptr->v_samp_factor,
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
-      } else {
-	/* Bottom-edge rows are only mirrored horizontally. */
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	   dst_blk_y + y_crop_blocks,
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
-      }
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	dst_row_ptr = dst_buffer[offset_y];
-	if (y_crop_blocks + dst_blk_y < comp_height) {
-	  /* Row is within the mirrorable area. */
-	  src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1];
-	  for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
-	    dst_ptr = dst_row_ptr[dst_blk_x];
-	    if (x_crop_blocks + dst_blk_x < comp_width) {
-	      /* Process the blocks that can be mirrored both ways. */
-	      src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
-	      for (i = 0; i < DCTSIZE; i += 2) {
-		/* For even row, negate every odd column. */
-		for (j = 0; j < DCTSIZE; j += 2) {
-		  *dst_ptr++ = *src_ptr++;
-		  *dst_ptr++ = - *src_ptr++;
-		}
-		/* For odd row, negate every even column. */
-		for (j = 0; j < DCTSIZE; j += 2) {
-		  *dst_ptr++ = - *src_ptr++;
-		  *dst_ptr++ = *src_ptr++;
-		}
-	      }
-	    } else {
-	      /* Any remaining right-edge blocks are only mirrored vertically. */
-	      src_ptr = src_row_ptr[x_crop_blocks + dst_blk_x];
-	      for (i = 0; i < DCTSIZE; i += 2) {
-		for (j = 0; j < DCTSIZE; j++)
-		  *dst_ptr++ = *src_ptr++;
-		for (j = 0; j < DCTSIZE; j++)
-		  *dst_ptr++ = - *src_ptr++;
-	      }
-	    }
-	  }
-	} else {
-	  /* Remaining rows are just mirrored horizontally. */
-	  src_row_ptr = src_buffer[offset_y];
-	  for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
-	    if (x_crop_blocks + dst_blk_x < comp_width) {
-	      /* Process the blocks that can be mirrored. */
-	      dst_ptr = dst_row_ptr[dst_blk_x];
-	      src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
-	      for (i = 0; i < DCTSIZE2; i += 2) {
-		*dst_ptr++ = *src_ptr++;
-		*dst_ptr++ = - *src_ptr++;
-	      }
-	    } else {
-	      /* Any remaining right-edge blocks are only copied. */
-	      jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks,
-			      dst_row_ptr + dst_blk_x,
-			      (JDIMENSION) 1);
-	    }
-	  }
-	}
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_transverse (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	       JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	       jvirt_barray_ptr *src_coef_arrays,
-	       jvirt_barray_ptr *dst_coef_arrays)
-/* Transverse transpose is equivalent to
- *   1. 180 degree rotation;
- *   2. Transposition;
- * or
- *   1. Horizontal mirroring;
- *   2. Transposition;
- *   3. Horizontal mirroring.
- * These steps are merged into a single processing routine.
- */
-{
-  JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height, dst_blk_x, dst_blk_y;
-  JDIMENSION x_crop_blocks, y_crop_blocks;
-  int ci, i, j, offset_x, offset_y;
-  JBLOCKARRAY src_buffer, dst_buffer;
-  JCOEFPTR src_ptr, dst_ptr;
-  jpeg_component_info *compptr;
-
-  MCU_cols = srcinfo->output_height /
-    (dstinfo->max_h_samp_factor * dstinfo->min_DCT_h_scaled_size);
-  MCU_rows = srcinfo->output_width /
-    (dstinfo->max_v_samp_factor * dstinfo->min_DCT_v_scaled_size);
-
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    comp_width = MCU_cols * compptr->h_samp_factor;
-    comp_height = MCU_rows * compptr->v_samp_factor;
-    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
-    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
-    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
-      dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
-	     dst_blk_x += compptr->h_samp_factor) {
-	  if (x_crop_blocks + dst_blk_x < comp_width) {
-	    /* Block is within the mirrorable area. */
-	    src_buffer = (*srcinfo->mem->access_virt_barray)
-	      ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	       comp_width - x_crop_blocks - dst_blk_x -
-	       (JDIMENSION) compptr->h_samp_factor,
-	       (JDIMENSION) compptr->h_samp_factor, FALSE);
-	  } else {
-	    src_buffer = (*srcinfo->mem->access_virt_barray)
-	      ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	       dst_blk_x + x_crop_blocks,
-	       (JDIMENSION) compptr->h_samp_factor, FALSE);
-	  }
-	  for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
-	    dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
-	    if (y_crop_blocks + dst_blk_y < comp_height) {
-	      if (x_crop_blocks + dst_blk_x < comp_width) {
-		/* Block is within the mirrorable area. */
-		src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
-		  [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
-		for (i = 0; i < DCTSIZE; i++) {
-		  for (j = 0; j < DCTSIZE; j++) {
-		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		    j++;
-		    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-		  }
-		  i++;
-		  for (j = 0; j < DCTSIZE; j++) {
-		    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-		    j++;
-		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		  }
-		}
-	      } else {
-		/* Right-edge blocks are mirrored in y only */
-		src_ptr = src_buffer[offset_x]
-		  [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
-		for (i = 0; i < DCTSIZE; i++) {
-		  for (j = 0; j < DCTSIZE; j++) {
-		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		    j++;
-		    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-		  }
-		}
-	      }
-	    } else {
-	      if (x_crop_blocks + dst_blk_x < comp_width) {
-		/* Bottom-edge blocks are mirrored in x only */
-		src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
-		  [dst_blk_y + offset_y + y_crop_blocks];
-		for (i = 0; i < DCTSIZE; i++) {
-		  for (j = 0; j < DCTSIZE; j++)
-		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		  i++;
-		  for (j = 0; j < DCTSIZE; j++)
-		    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-		}
-	      } else {
-		/* At lower right corner, just transpose, no mirroring */
-		src_ptr = src_buffer[offset_x]
-		  [dst_blk_y + offset_y + y_crop_blocks];
-		for (i = 0; i < DCTSIZE; i++)
-		  for (j = 0; j < DCTSIZE; j++)
-		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-	      }
-	    }
-	  }
-	}
-      }
-    }
-  }
-}
-
-
-/* Parse an unsigned integer: subroutine for jtransform_parse_crop_spec.
- * Returns TRUE if valid integer found, FALSE if not.
- * *strptr is advanced over the digit string, and *result is set to its value.
- */
-
-LOCAL(boolean)
-jt_read_integer (const char ** strptr, JDIMENSION * result)
-{
-  const char * ptr = *strptr;
-  JDIMENSION val = 0;
-
-  for (; isdigit(*ptr); ptr++) {
-    val = val * 10 + (JDIMENSION) (*ptr - '0');
-  }
-  *result = val;
-  if (ptr == *strptr)
-    return FALSE;		/* oops, no digits */
-  *strptr = ptr;
-  return TRUE;
-}
-
-
-/* Parse a crop specification (written in X11 geometry style).
- * The routine returns TRUE if the spec string is valid, FALSE if not.
- *
- * The crop spec string should have the format
- *	<width>[{fr}]x<height>[{fr}]{+-}<xoffset>{+-}<yoffset>
- * where width, height, xoffset, and yoffset are unsigned integers.
- * Each of the elements can be omitted to indicate a default value.
- * (A weakness of this style is that it is not possible to omit xoffset
- * while specifying yoffset, since they look alike.)
- *
- * This code is loosely based on XParseGeometry from the X11 distribution.
- */
-
-GLOBAL(boolean)
-jtransform_parse_crop_spec (jpeg_transform_info *info, const char *spec)
-{
-  info->crop = FALSE;
-  info->crop_width_set = JCROP_UNSET;
-  info->crop_height_set = JCROP_UNSET;
-  info->crop_xoffset_set = JCROP_UNSET;
-  info->crop_yoffset_set = JCROP_UNSET;
-
-  if (isdigit(*spec)) {
-    /* fetch width */
-    if (! jt_read_integer(&spec, &info->crop_width))
-      return FALSE;
-    if (*spec == 'f' || *spec == 'F') {
-      spec++;
-      info->crop_width_set = JCROP_FORCE;
-    } else if (*spec == 'r' || *spec == 'R') {
-      spec++;
-      info->crop_width_set = JCROP_REFLECT;
-    } else
-      info->crop_width_set = JCROP_POS;
-  }
-  if (*spec == 'x' || *spec == 'X') {
-    /* fetch height */
-    spec++;
-    if (! jt_read_integer(&spec, &info->crop_height))
-      return FALSE;
-    if (*spec == 'f' || *spec == 'F') {
-      spec++;
-      info->crop_height_set = JCROP_FORCE;
-    } else if (*spec == 'r' || *spec == 'R') {
-      spec++;
-      info->crop_height_set = JCROP_REFLECT;
-    } else
-      info->crop_height_set = JCROP_POS;
-  }
-  if (*spec == '+' || *spec == '-') {
-    /* fetch xoffset */
-    info->crop_xoffset_set = (*spec == '-') ? JCROP_NEG : JCROP_POS;
-    spec++;
-    if (! jt_read_integer(&spec, &info->crop_xoffset))
-      return FALSE;
-  }
-  if (*spec == '+' || *spec == '-') {
-    /* fetch yoffset */
-    info->crop_yoffset_set = (*spec == '-') ? JCROP_NEG : JCROP_POS;
-    spec++;
-    if (! jt_read_integer(&spec, &info->crop_yoffset))
-      return FALSE;
-  }
-  /* We had better have gotten to the end of the string. */
-  if (*spec != '\0')
-    return FALSE;
-  info->crop = TRUE;
-  return TRUE;
-}
-
-
-/* Trim off any partial iMCUs on the indicated destination edge */
-
-LOCAL(void)
-trim_right_edge (jpeg_transform_info *info, JDIMENSION full_width)
-{
-  JDIMENSION MCU_cols;
-
-  MCU_cols = info->output_width / info->iMCU_sample_width;
-  if (MCU_cols > 0 && info->x_crop_offset + MCU_cols ==
-      full_width / info->iMCU_sample_width)
-    info->output_width = MCU_cols * info->iMCU_sample_width;
-}
-
-LOCAL(void)
-trim_bottom_edge (jpeg_transform_info *info, JDIMENSION full_height)
-{
-  JDIMENSION MCU_rows;
-
-  MCU_rows = info->output_height / info->iMCU_sample_height;
-  if (MCU_rows > 0 && info->y_crop_offset + MCU_rows ==
-      full_height / info->iMCU_sample_height)
-    info->output_height = MCU_rows * info->iMCU_sample_height;
-}
-
-
-/* Request any required workspace.
- *
- * This routine figures out the size that the output image will be
- * (which implies that all the transform parameters must be set before
- * it is called).
- *
- * We allocate the workspace virtual arrays from the source decompression
- * object, so that all the arrays (both the original data and the workspace)
- * will be taken into account while making memory management decisions.
- * Hence, this routine must be called after jpeg_read_header (which reads
- * the image dimensions) and before jpeg_read_coefficients (which realizes
- * the source's virtual arrays).
- *
- * This function returns FALSE right away if -perfect is given
- * and transformation is not perfect.  Otherwise returns TRUE.
- */
-
-GLOBAL(boolean)
-jtransform_request_workspace (j_decompress_ptr srcinfo,
-			      jpeg_transform_info *info)
-{
-  jvirt_barray_ptr *coef_arrays;
-  boolean need_workspace, transpose_it;
-  jpeg_component_info *compptr;
-  JDIMENSION xoffset, yoffset, dtemp;
-  JDIMENSION width_in_iMCUs, height_in_iMCUs;
-  JDIMENSION width_in_blocks, height_in_blocks;
-  int itemp, ci, h_samp_factor, v_samp_factor;
-
-  /* Determine number of components in output image */
-  if (info->force_grayscale &&
-      (srcinfo->jpeg_color_space == JCS_YCbCr ||
-       srcinfo->jpeg_color_space == JCS_BG_YCC) &&
-      srcinfo->num_components == 3)
-    /* We'll only process the first component */
-    info->num_components = 1;
-  else
-    /* Process all the components */
-    info->num_components = srcinfo->num_components;
-
-  /* Compute output image dimensions and related values. */
-  jpeg_core_output_dimensions(srcinfo);
-
-  /* Return right away if -perfect is given and transformation is not perfect.
-   */
-  if (info->perfect) {
-    if (info->num_components == 1) {
-      if (!jtransform_perfect_transform(srcinfo->output_width,
-	  srcinfo->output_height,
-	  srcinfo->min_DCT_h_scaled_size,
-	  srcinfo->min_DCT_v_scaled_size,
-	  info->transform))
-	return FALSE;
-    } else {
-      if (!jtransform_perfect_transform(srcinfo->output_width,
-	  srcinfo->output_height,
-	  srcinfo->max_h_samp_factor * srcinfo->min_DCT_h_scaled_size,
-	  srcinfo->max_v_samp_factor * srcinfo->min_DCT_v_scaled_size,
-	  info->transform))
-	return FALSE;
-    }
-  }
-
-  /* If there is only one output component, force the iMCU size to be 1;
-   * else use the source iMCU size.  (This allows us to do the right thing
-   * when reducing color to grayscale, and also provides a handy way of
-   * cleaning up "funny" grayscale images whose sampling factors are not 1x1.)
-   */
-  switch (info->transform) {
-  case JXFORM_TRANSPOSE:
-  case JXFORM_TRANSVERSE:
-  case JXFORM_ROT_90:
-  case JXFORM_ROT_270:
-    info->output_width = srcinfo->output_height;
-    info->output_height = srcinfo->output_width;
-    if (info->num_components == 1) {
-      info->iMCU_sample_width = srcinfo->min_DCT_v_scaled_size;
-      info->iMCU_sample_height = srcinfo->min_DCT_h_scaled_size;
-    } else {
-      info->iMCU_sample_width =
-	srcinfo->max_v_samp_factor * srcinfo->min_DCT_v_scaled_size;
-      info->iMCU_sample_height =
-	srcinfo->max_h_samp_factor * srcinfo->min_DCT_h_scaled_size;
-    }
-    break;
-  default:
-    info->output_width = srcinfo->output_width;
-    info->output_height = srcinfo->output_height;
-    if (info->num_components == 1) {
-      info->iMCU_sample_width = srcinfo->min_DCT_h_scaled_size;
-      info->iMCU_sample_height = srcinfo->min_DCT_v_scaled_size;
-    } else {
-      info->iMCU_sample_width =
-	srcinfo->max_h_samp_factor * srcinfo->min_DCT_h_scaled_size;
-      info->iMCU_sample_height =
-	srcinfo->max_v_samp_factor * srcinfo->min_DCT_v_scaled_size;
-    }
-  }
-
-  /* If cropping has been requested, compute the crop area's position and
-   * dimensions, ensuring that its upper left corner falls at an iMCU boundary.
-   */
-  if (info->crop) {
-    /* Insert default values for unset crop parameters */
-    if (info->crop_xoffset_set == JCROP_UNSET)
-      info->crop_xoffset = 0;	/* default to +0 */
-    if (info->crop_yoffset_set == JCROP_UNSET)
-      info->crop_yoffset = 0;	/* default to +0 */
-    if (info->crop_width_set == JCROP_UNSET) {
-      if (info->crop_xoffset >= info->output_width)
-	ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
-      info->crop_width = info->output_width - info->crop_xoffset;
-    } else {
-      /* Check for crop extension */
-      if (info->crop_width > info->output_width) {
-	/* Crop extension does not work when transforming! */
-	if (info->transform != JXFORM_NONE ||
-	    info->crop_xoffset >= info->crop_width ||
-	    info->crop_xoffset > info->crop_width - info->output_width)
-	  ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
-      } else {
-	if (info->crop_xoffset >= info->output_width ||
-	    info->crop_width <= 0 ||
-	    info->crop_xoffset > info->output_width - info->crop_width)
-	  ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
-      }
-    }
-    if (info->crop_height_set == JCROP_UNSET) {
-      if (info->crop_yoffset >= info->output_height)
-	ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
-      info->crop_height = info->output_height - info->crop_yoffset;
-    } else {
-      /* Check for crop extension */
-      if (info->crop_height > info->output_height) {
-	/* Crop extension does not work when transforming! */
-	if (info->transform != JXFORM_NONE ||
-	    info->crop_yoffset >= info->crop_height ||
-	    info->crop_yoffset > info->crop_height - info->output_height)
-	  ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
-      } else {
-	if (info->crop_yoffset >= info->output_height ||
-	    info->crop_height <= 0 ||
-	    info->crop_yoffset > info->output_height - info->crop_height)
-	  ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
-      }
-    }
-    /* Convert negative crop offsets into regular offsets */
-    if (info->crop_xoffset_set != JCROP_NEG)
-      xoffset = info->crop_xoffset;
-    else if (info->crop_width > info->output_width) /* crop extension */
-      xoffset = info->crop_width - info->output_width - info->crop_xoffset;
-    else
-      xoffset = info->output_width - info->crop_width - info->crop_xoffset;
-    if (info->crop_yoffset_set != JCROP_NEG)
-      yoffset = info->crop_yoffset;
-    else if (info->crop_height > info->output_height) /* crop extension */
-      yoffset = info->crop_height - info->output_height - info->crop_yoffset;
-    else
-      yoffset = info->output_height - info->crop_height - info->crop_yoffset;
-    /* Now adjust so that upper left corner falls at an iMCU boundary */
-    switch (info->transform) {
-    case JXFORM_DROP:
-      /* Ensure the effective drop region will not exceed the requested */
-      itemp = info->iMCU_sample_width;
-      dtemp = itemp - 1 - ((xoffset + itemp - 1) % itemp);
-      xoffset += dtemp;
-      if (info->crop_width <= dtemp)
-	info->drop_width = 0;
-      else if (xoffset + info->crop_width - dtemp == info->output_width)
-	/* Matching right edge: include partial iMCU */
-	info->drop_width = (info->crop_width - dtemp + itemp - 1) / itemp;
-      else
-	info->drop_width = (info->crop_width - dtemp) / itemp;
-      itemp = info->iMCU_sample_height;
-      dtemp = itemp - 1 - ((yoffset + itemp - 1) % itemp);
-      yoffset += dtemp;
-      if (info->crop_height <= dtemp)
-	info->drop_height = 0;
-      else if (yoffset + info->crop_height - dtemp == info->output_height)
-	/* Matching bottom edge: include partial iMCU */
-	info->drop_height = (info->crop_height - dtemp + itemp - 1) / itemp;
-      else
-	info->drop_height = (info->crop_height - dtemp) / itemp;
-      /* Check if sampling factors match for dropping */
-      if (info->drop_width != 0 && info->drop_height != 0)
-	for (ci = 0; ci < info->num_components &&
-		     ci < info->drop_ptr->num_components; ci++) {
-	  if (info->drop_ptr->comp_info[ci].h_samp_factor *
-	      srcinfo->max_h_samp_factor !=
-	      srcinfo->comp_info[ci].h_samp_factor *
-	      info->drop_ptr->max_h_samp_factor)
-	    ERREXIT6(srcinfo, JERR_BAD_DROP_SAMPLING, ci,
-	      info->drop_ptr->comp_info[ci].h_samp_factor,
-	      info->drop_ptr->max_h_samp_factor,
-	      srcinfo->comp_info[ci].h_samp_factor,
-	      srcinfo->max_h_samp_factor, 'h');
-	  if (info->drop_ptr->comp_info[ci].v_samp_factor *
-	      srcinfo->max_v_samp_factor !=
-	      srcinfo->comp_info[ci].v_samp_factor *
-	      info->drop_ptr->max_v_samp_factor)
-	    ERREXIT6(srcinfo, JERR_BAD_DROP_SAMPLING, ci,
-	      info->drop_ptr->comp_info[ci].v_samp_factor,
-	      info->drop_ptr->max_v_samp_factor,
-	      srcinfo->comp_info[ci].v_samp_factor,
-	      srcinfo->max_v_samp_factor, 'v');
-	}
-      break;
-    case JXFORM_WIPE:
-      /* Ensure the effective wipe region will cover the requested */
-      info->drop_width = (JDIMENSION) jdiv_round_up
-	((long) (info->crop_width + (xoffset % info->iMCU_sample_width)),
-	 (long) info->iMCU_sample_width);
-      info->drop_height = (JDIMENSION) jdiv_round_up
-	((long) (info->crop_height + (yoffset % info->iMCU_sample_height)),
-	 (long) info->iMCU_sample_height);
-      break;
-    default:
-      /* Ensure the effective crop region will cover the requested */
-      if (info->crop_width_set == JCROP_FORCE ||
-	  info->crop_width > info->output_width)
-	info->output_width = info->crop_width;
-      else
-	info->output_width =
-	  info->crop_width + (xoffset % info->iMCU_sample_width);
-      if (info->crop_height_set == JCROP_FORCE ||
-	  info->crop_height > info->output_height)
-	info->output_height = info->crop_height;
-      else
-	info->output_height =
-	  info->crop_height + (yoffset % info->iMCU_sample_height);
-    }
-    /* Save x/y offsets measured in iMCUs */
-    info->x_crop_offset = xoffset / info->iMCU_sample_width;
-    info->y_crop_offset = yoffset / info->iMCU_sample_height;
-  } else {
-    info->x_crop_offset = 0;
-    info->y_crop_offset = 0;
-  }
-
-  /* Figure out whether we need workspace arrays,
-   * and if so whether they are transposed relative to the source.
-   */
-  need_workspace = FALSE;
-  transpose_it = FALSE;
-  switch (info->transform) {
-  case JXFORM_NONE:
-    if (info->x_crop_offset != 0 || info->y_crop_offset != 0 ||
-	info->output_width > srcinfo->output_width ||
-	info->output_height > srcinfo->output_height)
-      need_workspace = TRUE;
-    /* No workspace needed if neither cropping nor transforming */
-    break;
-  case JXFORM_FLIP_H:
-    if (info->trim)
-      trim_right_edge(info, srcinfo->output_width);
-    if (info->y_crop_offset != 0)
-      need_workspace = TRUE;
-    /* do_flip_h_no_crop doesn't need a workspace array */
-    break;
-  case JXFORM_FLIP_V:
-    if (info->trim)
-      trim_bottom_edge(info, srcinfo->output_height);
-    /* Need workspace arrays having same dimensions as source image. */
-    need_workspace = TRUE;
-    break;
-  case JXFORM_TRANSPOSE:
-    /* transpose does NOT have to trim anything */
-    /* Need workspace arrays having transposed dimensions. */
-    need_workspace = TRUE;
-    transpose_it = TRUE;
-    break;
-  case JXFORM_TRANSVERSE:
-    if (info->trim) {
-      trim_right_edge(info, srcinfo->output_height);
-      trim_bottom_edge(info, srcinfo->output_width);
-    }
-    /* Need workspace arrays having transposed dimensions. */
-    need_workspace = TRUE;
-    transpose_it = TRUE;
-    break;
-  case JXFORM_ROT_90:
-    if (info->trim)
-      trim_right_edge(info, srcinfo->output_height);
-    /* Need workspace arrays having transposed dimensions. */
-    need_workspace = TRUE;
-    transpose_it = TRUE;
-    break;
-  case JXFORM_ROT_180:
-    if (info->trim) {
-      trim_right_edge(info, srcinfo->output_width);
-      trim_bottom_edge(info, srcinfo->output_height);
-    }
-    /* Need workspace arrays having same dimensions as source image. */
-    need_workspace = TRUE;
-    break;
-  case JXFORM_ROT_270:
-    if (info->trim)
-      trim_bottom_edge(info, srcinfo->output_width);
-    /* Need workspace arrays having transposed dimensions. */
-    need_workspace = TRUE;
-    transpose_it = TRUE;
-    break;
-  case JXFORM_WIPE:
-    break;
-  case JXFORM_DROP:
-#if DROP_REQUEST_FROM_SRC
-    drop_request_from_src(info->drop_ptr, srcinfo);
-#endif
-    break;
-  }
-
-  /* Allocate workspace if needed.
-   * Note that we allocate arrays padded out to the next iMCU boundary,
-   * so that transform routines need not worry about missing edge blocks.
-   */
-  if (need_workspace) {
-    coef_arrays = (jvirt_barray_ptr *) (*srcinfo->mem->alloc_small)
-      ((j_common_ptr) srcinfo, JPOOL_IMAGE,
-       SIZEOF(jvirt_barray_ptr) * info->num_components);
-    width_in_iMCUs = (JDIMENSION) jdiv_round_up
-      ((long) info->output_width, (long) info->iMCU_sample_width);
-    height_in_iMCUs = (JDIMENSION) jdiv_round_up
-      ((long) info->output_height, (long) info->iMCU_sample_height);
-    for (ci = 0; ci < info->num_components; ci++) {
-      compptr = srcinfo->comp_info + ci;
-      if (info->num_components == 1) {
-	/* we're going to force samp factors to 1x1 in this case */
-	h_samp_factor = v_samp_factor = 1;
-      } else if (transpose_it) {
-	h_samp_factor = compptr->v_samp_factor;
-	v_samp_factor = compptr->h_samp_factor;
-      } else {
-	h_samp_factor = compptr->h_samp_factor;
-	v_samp_factor = compptr->v_samp_factor;
-      }
-      width_in_blocks = width_in_iMCUs * h_samp_factor;
-      height_in_blocks = height_in_iMCUs * v_samp_factor;
-      coef_arrays[ci] = (*srcinfo->mem->request_virt_barray)
-	((j_common_ptr) srcinfo, JPOOL_IMAGE, FALSE,
-	 width_in_blocks, height_in_blocks, (JDIMENSION) v_samp_factor);
-    }
-    info->workspace_coef_arrays = coef_arrays;
-  } else
-    info->workspace_coef_arrays = NULL;
-
-  return TRUE;
-}
-
-
-/* Transpose destination image parameters */
-
-LOCAL(void)
-transpose_critical_parameters (j_compress_ptr dstinfo)
-{
-  int tblno, i, j, ci, itemp;
-  jpeg_component_info *compptr;
-  JQUANT_TBL *qtblptr;
-  JDIMENSION jtemp;
-  UINT16 qtemp;
-
-  /* Transpose image dimensions */
-  jtemp = dstinfo->image_width;
-  dstinfo->image_width = dstinfo->image_height;
-  dstinfo->image_height = jtemp;
-  itemp = dstinfo->min_DCT_h_scaled_size;
-  dstinfo->min_DCT_h_scaled_size = dstinfo->min_DCT_v_scaled_size;
-  dstinfo->min_DCT_v_scaled_size = itemp;
-
-  /* Transpose sampling factors */
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    itemp = compptr->h_samp_factor;
-    compptr->h_samp_factor = compptr->v_samp_factor;
-    compptr->v_samp_factor = itemp;
-  }
-
-  /* Transpose quantization tables */
-  for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) {
-    qtblptr = dstinfo->quant_tbl_ptrs[tblno];
-    if (qtblptr != NULL) {
-      for (i = 0; i < DCTSIZE; i++) {
-	for (j = 0; j < i; j++) {
-	  qtemp = qtblptr->quantval[i*DCTSIZE+j];
-	  qtblptr->quantval[i*DCTSIZE+j] = qtblptr->quantval[j*DCTSIZE+i];
-	  qtblptr->quantval[j*DCTSIZE+i] = qtemp;
-	}
-      }
-    }
-  }
-}
-
-
-/* Adjust Exif image parameters.
- *
- * We try to adjust the Tags ExifImageWidth and ExifImageHeight if possible.
- */
-
-LOCAL(void)
-adjust_exif_parameters (JOCTET FAR * data, unsigned int length,
-			JDIMENSION new_width, JDIMENSION new_height)
-{
-  boolean is_motorola; /* Flag for byte order */
-  unsigned int number_of_tags, tagnum;
-  unsigned int firstoffset, offset;
-  JDIMENSION new_value;
-
-  if (length < 12) return; /* Length of an IFD entry */
-
-  /* Discover byte order */
-  if (GETJOCTET(data[0]) == 0x49 && GETJOCTET(data[1]) == 0x49)
-    is_motorola = FALSE;
-  else if (GETJOCTET(data[0]) == 0x4D && GETJOCTET(data[1]) == 0x4D)
-    is_motorola = TRUE;
-  else
-    return;
-
-  /* Check Tag Mark */
-  if (is_motorola) {
-    if (GETJOCTET(data[2]) != 0) return;
-    if (GETJOCTET(data[3]) != 0x2A) return;
-  } else {
-    if (GETJOCTET(data[3]) != 0) return;
-    if (GETJOCTET(data[2]) != 0x2A) return;
-  }
-
-  /* Get first IFD offset (offset to IFD0) */
-  if (is_motorola) {
-    if (GETJOCTET(data[4]) != 0) return;
-    if (GETJOCTET(data[5]) != 0) return;
-    firstoffset = GETJOCTET(data[6]);
-    firstoffset <<= 8;
-    firstoffset += GETJOCTET(data[7]);
-  } else {
-    if (GETJOCTET(data[7]) != 0) return;
-    if (GETJOCTET(data[6]) != 0) return;
-    firstoffset = GETJOCTET(data[5]);
-    firstoffset <<= 8;
-    firstoffset += GETJOCTET(data[4]);
-  }
-  if (firstoffset > length - 2) return; /* check end of data segment */
-
-  /* Get the number of directory entries contained in this IFD */
-  if (is_motorola) {
-    number_of_tags = GETJOCTET(data[firstoffset]);
-    number_of_tags <<= 8;
-    number_of_tags += GETJOCTET(data[firstoffset+1]);
-  } else {
-    number_of_tags = GETJOCTET(data[firstoffset+1]);
-    number_of_tags <<= 8;
-    number_of_tags += GETJOCTET(data[firstoffset]);
-  }
-  if (number_of_tags == 0) return;
-  firstoffset += 2;
-
-  /* Search for ExifSubIFD offset Tag in IFD0 */
-  for (;;) {
-    if (firstoffset > length - 12) return; /* check end of data segment */
-    /* Get Tag number */
-    if (is_motorola) {
-      tagnum = GETJOCTET(data[firstoffset]);
-      tagnum <<= 8;
-      tagnum += GETJOCTET(data[firstoffset+1]);
-    } else {
-      tagnum = GETJOCTET(data[firstoffset+1]);
-      tagnum <<= 8;
-      tagnum += GETJOCTET(data[firstoffset]);
-    }
-    if (tagnum == 0x8769) break; /* found ExifSubIFD offset Tag */
-    if (--number_of_tags == 0) return;
-    firstoffset += 12;
-  }
-
-  /* Get the ExifSubIFD offset */
-  if (is_motorola) {
-    if (GETJOCTET(data[firstoffset+8]) != 0) return;
-    if (GETJOCTET(data[firstoffset+9]) != 0) return;
-    offset = GETJOCTET(data[firstoffset+10]);
-    offset <<= 8;
-    offset += GETJOCTET(data[firstoffset+11]);
-  } else {
-    if (GETJOCTET(data[firstoffset+11]) != 0) return;
-    if (GETJOCTET(data[firstoffset+10]) != 0) return;
-    offset = GETJOCTET(data[firstoffset+9]);
-    offset <<= 8;
-    offset += GETJOCTET(data[firstoffset+8]);
-  }
-  if (offset > length - 2) return; /* check end of data segment */
-
-  /* Get the number of directory entries contained in this SubIFD */
-  if (is_motorola) {
-    number_of_tags = GETJOCTET(data[offset]);
-    number_of_tags <<= 8;
-    number_of_tags += GETJOCTET(data[offset+1]);
-  } else {
-    number_of_tags = GETJOCTET(data[offset+1]);
-    number_of_tags <<= 8;
-    number_of_tags += GETJOCTET(data[offset]);
-  }
-  if (number_of_tags < 2) return;
-  offset += 2;
-
-  /* Search for ExifImageWidth and ExifImageHeight Tags in this SubIFD */
-  do {
-    if (offset > length - 12) return; /* check end of data segment */
-    /* Get Tag number */
-    if (is_motorola) {
-      tagnum = GETJOCTET(data[offset]);
-      tagnum <<= 8;
-      tagnum += GETJOCTET(data[offset+1]);
-    } else {
-      tagnum = GETJOCTET(data[offset+1]);
-      tagnum <<= 8;
-      tagnum += GETJOCTET(data[offset]);
-    }
-    if (tagnum == 0xA002 || tagnum == 0xA003) {
-      if (tagnum == 0xA002)
-	new_value = new_width; /* ExifImageWidth Tag */
-      else
-	new_value = new_height; /* ExifImageHeight Tag */
-      if (is_motorola) {
-	data[offset+2] = 0; /* Format = unsigned long (4 octets) */
-	data[offset+3] = 4;
-	data[offset+4] = 0; /* Number Of Components = 1 */
-	data[offset+5] = 0;
-	data[offset+6] = 0;
-	data[offset+7] = 1;
-	data[offset+8] = 0;
-	data[offset+9] = 0;
-	data[offset+10] = (JOCTET)((new_value >> 8) & 0xFF);
-	data[offset+11] = (JOCTET)(new_value & 0xFF);
-      } else {
-	data[offset+2] = 4; /* Format = unsigned long (4 octets) */
-	data[offset+3] = 0;
-	data[offset+4] = 1; /* Number Of Components = 1 */
-	data[offset+5] = 0;
-	data[offset+6] = 0;
-	data[offset+7] = 0;
-	data[offset+8] = (JOCTET)(new_value & 0xFF);
-	data[offset+9] = (JOCTET)((new_value >> 8) & 0xFF);
-	data[offset+10] = 0;
-	data[offset+11] = 0;
-      }
-    }
-    offset += 12;
-  } while (--number_of_tags);
-}
-
-
-/* Adjust output image parameters as needed.
- *
- * This must be called after jpeg_copy_critical_parameters()
- * and before jpeg_write_coefficients().
- *
- * The return value is the set of virtual coefficient arrays to be written
- * (either the ones allocated by jtransform_request_workspace, or the
- * original source data arrays).  The caller will need to pass this value
- * to jpeg_write_coefficients().
- */
-
-GLOBAL(jvirt_barray_ptr *)
-jtransform_adjust_parameters (j_decompress_ptr srcinfo,
-			      j_compress_ptr dstinfo,
-			      jvirt_barray_ptr *src_coef_arrays,
-			      jpeg_transform_info *info)
-{
-  /* If force-to-grayscale is requested, adjust destination parameters */
-  if (info->force_grayscale) {
-    /* First, ensure we have YCC or grayscale data, and that the source's
-     * Y channel is full resolution.  (No reasonable person would make Y
-     * be less than full resolution, so actually coping with that case
-     * isn't worth extra code space.  But we check it to avoid crashing.)
-     */
-    if ((((dstinfo->jpeg_color_space == JCS_YCbCr ||
-	   dstinfo->jpeg_color_space == JCS_BG_YCC) &&
-	  dstinfo->num_components == 3) ||
-	 (dstinfo->jpeg_color_space == JCS_GRAYSCALE &&
-	  dstinfo->num_components == 1)) &&
-	srcinfo->comp_info[0].h_samp_factor == srcinfo->max_h_samp_factor &&
-	srcinfo->comp_info[0].v_samp_factor == srcinfo->max_v_samp_factor) {
-      /* We use jpeg_set_colorspace to make sure subsidiary settings get fixed
-       * properly.  Among other things, it sets the target h_samp_factor &
-       * v_samp_factor to 1, which typically won't match the source.
-       * We have to preserve the source's quantization table number, however.
-       */
-      int sv_quant_tbl_no = dstinfo->comp_info[0].quant_tbl_no;
-      jpeg_set_colorspace(dstinfo, JCS_GRAYSCALE);
-      dstinfo->comp_info[0].quant_tbl_no = sv_quant_tbl_no;
-    } else {
-      /* Sorry, can't do it */
-      ERREXIT(dstinfo, JERR_CONVERSION_NOTIMPL);
-    }
-  } else if (info->num_components == 1) {
-    /* For a single-component source, we force the destination sampling factors
-     * to 1x1, with or without force_grayscale.  This is useful because some
-     * decoders choke on grayscale images with other sampling factors.
-     */
-    dstinfo->comp_info[0].h_samp_factor = 1;
-    dstinfo->comp_info[0].v_samp_factor = 1;
-  }
-
-  /* Correct the destination's image dimensions as necessary
-   * for rotate/flip, resize, and crop operations.
-   */
-  dstinfo->jpeg_width = info->output_width;
-  dstinfo->jpeg_height = info->output_height;
-
-  /* Transpose destination image parameters, adjust quantization */
-  switch (info->transform) {
-  case JXFORM_TRANSPOSE:
-  case JXFORM_TRANSVERSE:
-  case JXFORM_ROT_90:
-  case JXFORM_ROT_270:
-    transpose_critical_parameters(dstinfo);
-    break;
-  case JXFORM_DROP:
-    if (info->drop_width != 0 && info->drop_height != 0)
-      adjust_quant(srcinfo, src_coef_arrays,
-		   info->drop_ptr, info->drop_coef_arrays,
-		   info->trim, dstinfo);
-    break;
-  default:
-    break;
-  }
-
-  /* Adjust Exif properties */
-  if (srcinfo->marker_list != NULL &&
-      srcinfo->marker_list->marker == JPEG_APP0+1 &&
-      srcinfo->marker_list->data_length >= 6 &&
-      GETJOCTET(srcinfo->marker_list->data[0]) == 0x45 &&
-      GETJOCTET(srcinfo->marker_list->data[1]) == 0x78 &&
-      GETJOCTET(srcinfo->marker_list->data[2]) == 0x69 &&
-      GETJOCTET(srcinfo->marker_list->data[3]) == 0x66 &&
-      GETJOCTET(srcinfo->marker_list->data[4]) == 0 &&
-      GETJOCTET(srcinfo->marker_list->data[5]) == 0) {
-    /* Suppress output of JFIF marker */
-    dstinfo->write_JFIF_header = FALSE;
-    /* Adjust Exif image parameters */
-    if (dstinfo->jpeg_width != srcinfo->image_width ||
-	dstinfo->jpeg_height != srcinfo->image_height)
-      /* Align data segment to start of TIFF structure for parsing */
-      adjust_exif_parameters(srcinfo->marker_list->data + 6,
-	srcinfo->marker_list->data_length - 6,
-	dstinfo->jpeg_width, dstinfo->jpeg_height);
-  }
-
-  /* Return the appropriate output data set */
-  if (info->workspace_coef_arrays != NULL)
-    return info->workspace_coef_arrays;
-  return src_coef_arrays;
-}
-
-
-/* Execute the actual transformation, if any.
- *
- * This must be called *after* jpeg_write_coefficients, because it depends
- * on jpeg_write_coefficients to have computed subsidiary values such as
- * the per-component width and height fields in the destination object.
- *
- * Note that some transformations will modify the source data arrays!
- */
-
-GLOBAL(void)
-jtransform_execute_transform (j_decompress_ptr srcinfo,
-			      j_compress_ptr dstinfo,
-			      jvirt_barray_ptr *src_coef_arrays,
-			      jpeg_transform_info *info)
-{
-  jvirt_barray_ptr *dst_coef_arrays = info->workspace_coef_arrays;
-
-  /* Note: conditions tested here should match those in switch statement
-   * in jtransform_request_workspace()
-   */
-  switch (info->transform) {
-  case JXFORM_NONE:
-    if (info->output_width > srcinfo->output_width ||
-	info->output_height > srcinfo->output_height) {
-      if (info->output_width > srcinfo->output_width &&
-	  info->crop_width_set == JCROP_REFLECT)
-	do_crop_ext_reflect(srcinfo, dstinfo,
-			    info->x_crop_offset, info->y_crop_offset,
-			    src_coef_arrays, dst_coef_arrays);
-      else if (info->output_width > srcinfo->output_width &&
-	       info->crop_width_set == JCROP_FORCE)
-	do_crop_ext_flat(srcinfo, dstinfo,
-			 info->x_crop_offset, info->y_crop_offset,
-			 src_coef_arrays, dst_coef_arrays);
-      else
-	do_crop_ext_zero(srcinfo, dstinfo,
-			 info->x_crop_offset, info->y_crop_offset,
-			 src_coef_arrays, dst_coef_arrays);
-    } else if (info->x_crop_offset != 0 || info->y_crop_offset != 0)
-      do_crop(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-	      src_coef_arrays, dst_coef_arrays);
-    break;
-  case JXFORM_FLIP_H:
-    if (info->y_crop_offset != 0)
-      do_flip_h(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-		src_coef_arrays, dst_coef_arrays);
-    else
-      do_flip_h_no_crop(srcinfo, dstinfo, info->x_crop_offset,
-			src_coef_arrays);
-    break;
-  case JXFORM_FLIP_V:
-    do_flip_v(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-	      src_coef_arrays, dst_coef_arrays);
-    break;
-  case JXFORM_TRANSPOSE:
-    do_transpose(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-		 src_coef_arrays, dst_coef_arrays);
-    break;
-  case JXFORM_TRANSVERSE:
-    do_transverse(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-		  src_coef_arrays, dst_coef_arrays);
-    break;
-  case JXFORM_ROT_90:
-    do_rot_90(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-	      src_coef_arrays, dst_coef_arrays);
-    break;
-  case JXFORM_ROT_180:
-    do_rot_180(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-	       src_coef_arrays, dst_coef_arrays);
-    break;
-  case JXFORM_ROT_270:
-    do_rot_270(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-	       src_coef_arrays, dst_coef_arrays);
-    break;
-  case JXFORM_WIPE:
-    if (info->crop_width_set == JCROP_REFLECT &&
-	info->y_crop_offset == 0 && info->drop_height ==
-	(JDIMENSION) jdiv_round_up
-	  ((long) info->output_height, (long) info->iMCU_sample_height) &&
-	(info->x_crop_offset == 0 ||
-	 info->x_crop_offset + info->drop_width ==
-	 (JDIMENSION) jdiv_round_up
-	   ((long) info->output_width, (long) info->iMCU_sample_width)))
-      do_reflect(srcinfo, dstinfo, info->x_crop_offset,
-		 src_coef_arrays, info->drop_width, info->drop_height);
-    else if (info->crop_width_set == JCROP_FORCE)
-      do_flatten(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-		 src_coef_arrays, info->drop_width, info->drop_height);
-    else
-      do_wipe(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-	      src_coef_arrays, info->drop_width, info->drop_height);
-    break;
-  case JXFORM_DROP:
-    if (info->drop_width != 0 && info->drop_height != 0)
-      do_drop(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-	      src_coef_arrays, info->drop_ptr, info->drop_coef_arrays,
-	      info->drop_width, info->drop_height);
-    break;
-  }
-}
-
-/* jtransform_perfect_transform
- *
- * Determine whether lossless transformation is perfectly
- * possible for a specified image and transformation.
- *
- * Inputs:
- *   image_width, image_height: source image dimensions.
- *   MCU_width, MCU_height: pixel dimensions of MCU.
- *   transform: transformation identifier.
- * Parameter sources from initialized jpeg_struct
- * (after reading source header):
- *   image_width = cinfo.image_width
- *   image_height = cinfo.image_height
- *   MCU_width = cinfo.max_h_samp_factor * cinfo.block_size
- *   MCU_height = cinfo.max_v_samp_factor * cinfo.block_size
- * Result:
- *   TRUE = perfect transformation possible
- *   FALSE = perfect transformation not possible
- *           (may use custom action then)
- */
-
-GLOBAL(boolean)
-jtransform_perfect_transform(JDIMENSION image_width, JDIMENSION image_height,
-			     int MCU_width, int MCU_height,
-			     JXFORM_CODE transform)
-{
-  boolean result = TRUE; /* initialize TRUE */
-
-  switch (transform) {
-  case JXFORM_FLIP_H:
-  case JXFORM_ROT_270:
-    if (image_width % (JDIMENSION) MCU_width)
-      result = FALSE;
-    break;
-  case JXFORM_FLIP_V:
-  case JXFORM_ROT_90:
-    if (image_height % (JDIMENSION) MCU_height)
-      result = FALSE;
-    break;
-  case JXFORM_TRANSVERSE:
-  case JXFORM_ROT_180:
-    if (image_width % (JDIMENSION) MCU_width)
-      result = FALSE;
-    if (image_height % (JDIMENSION) MCU_height)
-      result = FALSE;
-    break;
-  default:
-    break;
-  }
-
-  return result;
-}
-
-#endif /* TRANSFORMS_SUPPORTED */
-
-
-/* Setup decompression object to save desired markers in memory.
- * This must be called before jpeg_read_header() to have the desired effect.
- */
-
-GLOBAL(void)
-jcopy_markers_setup (j_decompress_ptr srcinfo, JCOPY_OPTION option)
-{
-#ifdef SAVE_MARKERS_SUPPORTED
-  int m;
-
-  /* Save comments except under NONE option */
-  if (option != JCOPYOPT_NONE) {
-    jpeg_save_markers(srcinfo, JPEG_COM, 0xFFFF);
-  }
-  /* Save all types of APPn markers iff ALL option */
-  if (option == JCOPYOPT_ALL) {
-    for (m = 0; m < 16; m++)
-      jpeg_save_markers(srcinfo, JPEG_APP0 + m, 0xFFFF);
-  }
-#endif /* SAVE_MARKERS_SUPPORTED */
-}
-
-/* Copy markers saved in the given source object to the destination object.
- * This should be called just after jpeg_start_compress() or
- * jpeg_write_coefficients().
- * Note that those routines will have written the SOI, and also the
- * JFIF APP0 or Adobe APP14 markers if selected.
- */
-
-GLOBAL(void)
-jcopy_markers_execute (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-		       JCOPY_OPTION option)
-{
-  jpeg_saved_marker_ptr marker;
-
-  /* In the current implementation, we don't actually need to examine the
-   * option flag here; we just copy everything that got saved.
-   * But to avoid confusion, we do not output JFIF and Adobe APP14 markers
-   * if the encoder library already wrote one.
-   */
-  for (marker = srcinfo->marker_list; marker != NULL; marker = marker->next) {
-    if (dstinfo->write_JFIF_header &&
-	marker->marker == JPEG_APP0 &&
-	marker->data_length >= 5 &&
-	GETJOCTET(marker->data[0]) == 0x4A &&
-	GETJOCTET(marker->data[1]) == 0x46 &&
-	GETJOCTET(marker->data[2]) == 0x49 &&
-	GETJOCTET(marker->data[3]) == 0x46 &&
-	GETJOCTET(marker->data[4]) == 0)
-      continue;			/* reject duplicate JFIF */
-    if (dstinfo->write_Adobe_marker &&
-	marker->marker == JPEG_APP0+14 &&
-	marker->data_length >= 5 &&
-	GETJOCTET(marker->data[0]) == 0x41 &&
-	GETJOCTET(marker->data[1]) == 0x64 &&
-	GETJOCTET(marker->data[2]) == 0x6F &&
-	GETJOCTET(marker->data[3]) == 0x62 &&
-	GETJOCTET(marker->data[4]) == 0x65)
-      continue;			/* reject duplicate Adobe */
-#ifdef NEED_FAR_POINTERS
-    /* We could use jpeg_write_marker if the data weren't FAR... */
-    {
-      unsigned int i;
-      jpeg_write_m_header(dstinfo, marker->marker, marker->data_length);
-      for (i = 0; i < marker->data_length; i++)
-	jpeg_write_m_byte(dstinfo, marker->data[i]);
-    }
-#else
-    jpeg_write_marker(dstinfo, marker->marker,
-		      marker->data, marker->data_length);
-#endif
-  }
-}
diff --git a/dep/libjpeg/src/transupp.h b/dep/libjpeg/src/transupp.h
deleted file mode 100644
index a8ba16ad7..000000000
--- a/dep/libjpeg/src/transupp.h
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * transupp.h
- *
- * Copyright (C) 1997-2019, Thomas G. Lane, Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains declarations for image transformation routines and
- * other utility code used by the jpegtran sample application.  These are
- * NOT part of the core JPEG library.  But we keep these routines separate
- * from jpegtran.c to ease the task of maintaining jpegtran-like programs
- * that have other user interfaces.
- *
- * NOTE: all the routines declared here have very specific requirements
- * about when they are to be executed during the reading and writing of the
- * source and destination files.  See the comments in transupp.c, or see
- * jpegtran.c for an example of correct usage.
- */
-
-/* If you happen not to want the image transform support, disable it here */
-#ifndef TRANSFORMS_SUPPORTED
-#define TRANSFORMS_SUPPORTED 1		/* 0 disables transform code */
-#endif
-
-/*
- * Although rotating and flipping data expressed as DCT coefficients is not
- * hard, there is an asymmetry in the JPEG format specification for images
- * whose dimensions aren't multiples of the iMCU size.  The right and bottom
- * image edges are padded out to the next iMCU boundary with junk data; but
- * no padding is possible at the top and left edges.  If we were to flip
- * the whole image including the pad data, then pad garbage would become
- * visible at the top and/or left, and real pixels would disappear into the
- * pad margins --- perhaps permanently, since encoders & decoders may not
- * bother to preserve DCT blocks that appear to be completely outside the
- * nominal image area.  So, we have to exclude any partial iMCUs from the
- * basic transformation.
- *
- * Transpose is the only transformation that can handle partial iMCUs at the
- * right and bottom edges completely cleanly.  flip_h can flip partial iMCUs
- * at the bottom, but leaves any partial iMCUs at the right edge untouched.
- * Similarly flip_v leaves any partial iMCUs at the bottom edge untouched.
- * The other transforms are defined as combinations of these basic transforms
- * and process edge blocks in a way that preserves the equivalence.
- *
- * The "trim" option causes untransformable partial iMCUs to be dropped;
- * this is not strictly lossless, but it usually gives the best-looking
- * result for odd-size images.  Note that when this option is active,
- * the expected mathematical equivalences between the transforms may not hold.
- * (For example, -rot 270 -trim trims only the bottom edge, but -rot 90 -trim
- * followed by -rot 180 -trim trims both edges.)
- *
- * We also offer a lossless-crop option, which discards data outside a given
- * image region but losslessly preserves what is inside.  Like the rotate and
- * flip transforms, lossless crop is restricted by the current JPEG format: the
- * upper left corner of the selected region must fall on an iMCU boundary.  If
- * this does not hold for the given crop parameters, we silently move the upper
- * left corner up and/or left to make it so, simultaneously increasing the
- * region dimensions to keep the lower right crop corner unchanged.  (Thus, the
- * output image covers at least the requested region, but may cover more.)
- * The adjustment of the region dimensions may be optionally disabled.
- *
- * A complementary lossless-wipe option is provided to discard (gray out) data
- * inside a given image region while losslessly preserving what is outside.
- * Another option is lossless-drop, which replaces data at a given image
- * position by another image.  Both source images must have the same
- * subsampling values.  It is best if they also have the same quantization,
- * otherwise quantization adaption occurs.  The trim option can be used with
- * the drop option to requantize the drop file to the source file.
- *
- * We also provide a lossless-resize option, which is kind of a lossless-crop
- * operation in the DCT coefficient block domain - it discards higher-order
- * coefficients and losslessly preserves lower-order coefficients of a
- * sub-block.
- *
- * Rotate/flip transform, resize, and crop can be requested together in a
- * single invocation.  The crop is applied last --- that is, the crop region
- * is specified in terms of the destination image after transform/resize.
- *
- * We also offer a "force to grayscale" option, which simply discards the
- * chrominance channels of a YCbCr image.  This is lossless in the sense that
- * the luminance channel is preserved exactly.  It's not the same kind of
- * thing as the rotate/flip transformations, but it's convenient to handle it
- * as part of this package, mainly because the transformation routines have to
- * be aware of the option to know how many components to work on.
- */
-
-
-/* Short forms of external names for systems with brain-damaged linkers. */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jtransform_parse_crop_spec	jTrParCrop
-#define jtransform_request_workspace	jTrRequest
-#define jtransform_adjust_parameters	jTrAdjust
-#define jtransform_execute_transform	jTrExec
-#define jtransform_perfect_transform	jTrPerfect
-#define jcopy_markers_setup		jCMrkSetup
-#define jcopy_markers_execute		jCMrkExec
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
-
-/*
- * Codes for supported types of image transformations.
- */
-
-typedef enum {
-	JXFORM_NONE,		/* no transformation */
-	JXFORM_FLIP_H,		/* horizontal flip */
-	JXFORM_FLIP_V,		/* vertical flip */
-	JXFORM_TRANSPOSE,	/* transpose across UL-to-LR axis */
-	JXFORM_TRANSVERSE,	/* transpose across UR-to-LL axis */
-	JXFORM_ROT_90,		/* 90-degree clockwise rotation */
-	JXFORM_ROT_180,		/* 180-degree rotation */
-	JXFORM_ROT_270,		/* 270-degree clockwise (or 90 ccw) */
-	JXFORM_WIPE,		/* wipe */
-	JXFORM_DROP		/* drop */
-} JXFORM_CODE;
-
-/*
- * Codes for crop parameters, which can individually be unspecified,
- * positive or negative for xoffset or yoffset,
- * positive or force or reflect for width or height.
- */
-
-typedef enum {
-	JCROP_UNSET,
-	JCROP_POS,
-	JCROP_NEG,
-	JCROP_FORCE,
-	JCROP_REFLECT
-} JCROP_CODE;
-
-/*
- * Transform parameters struct.
- * NB: application must not change any elements of this struct after
- * calling jtransform_request_workspace.
- */
-
-typedef struct {
-  /* Options: set by caller */
-  JXFORM_CODE transform;	/* image transform operator */
-  boolean perfect;		/* if TRUE, fail if partial MCUs are requested */
-  boolean trim;			/* if TRUE, trim partial MCUs as needed */
-  boolean force_grayscale;	/* if TRUE, convert color image to grayscale */
-  boolean crop;			/* if TRUE, crop or wipe source image, or drop */
-
-  /* Crop parameters: application need not set these unless crop is TRUE.
-   * These can be filled in by jtransform_parse_crop_spec().
-   */
-  JDIMENSION crop_width;	/* Width of selected region */
-  JCROP_CODE crop_width_set;	/* (force disables adjustment) */
-  JDIMENSION crop_height;	/* Height of selected region */
-  JCROP_CODE crop_height_set;	/* (force disables adjustment) */
-  JDIMENSION crop_xoffset;	/* X offset of selected region */
-  JCROP_CODE crop_xoffset_set;	/* (negative measures from right edge) */
-  JDIMENSION crop_yoffset;	/* Y offset of selected region */
-  JCROP_CODE crop_yoffset_set;	/* (negative measures from bottom edge) */
-
-  /* Drop parameters: set by caller for drop request */
-  j_decompress_ptr drop_ptr;
-  jvirt_barray_ptr * drop_coef_arrays;
-
-  /* Internal workspace: caller should not touch these */
-  int num_components;		/* # of components in workspace */
-  jvirt_barray_ptr * workspace_coef_arrays; /* workspace for transformations */
-  JDIMENSION output_width;	/* cropped destination dimensions */
-  JDIMENSION output_height;
-  JDIMENSION x_crop_offset;	/* destination crop offsets measured in iMCUs */
-  JDIMENSION y_crop_offset;
-  JDIMENSION drop_width;	/* drop/wipe dimensions measured in iMCUs */
-  JDIMENSION drop_height;
-  int iMCU_sample_width;	/* destination iMCU size */
-  int iMCU_sample_height;
-} jpeg_transform_info;
-
-
-#if TRANSFORMS_SUPPORTED
-
-/* Parse a crop specification (written in X11 geometry style) */
-EXTERN(boolean) jtransform_parse_crop_spec
-	JPP((jpeg_transform_info *info, const char *spec));
-/* Request any required workspace */
-EXTERN(boolean) jtransform_request_workspace
-	JPP((j_decompress_ptr srcinfo, jpeg_transform_info *info));
-/* Adjust output image parameters */
-EXTERN(jvirt_barray_ptr *) jtransform_adjust_parameters
-	JPP((j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	     jvirt_barray_ptr *src_coef_arrays,
-	     jpeg_transform_info *info));
-/* Execute the actual transformation, if any */
-EXTERN(void) jtransform_execute_transform
-	JPP((j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	     jvirt_barray_ptr *src_coef_arrays,
-	     jpeg_transform_info *info));
-/* Determine whether lossless transformation is perfectly
- * possible for a specified image and transformation.
- */
-EXTERN(boolean) jtransform_perfect_transform
-	JPP((JDIMENSION image_width, JDIMENSION image_height,
-	     int MCU_width, int MCU_height,
-	     JXFORM_CODE transform));
-
-/* jtransform_execute_transform used to be called
- * jtransform_execute_transformation, but some compilers complain about
- * routine names that long.  This macro is here to avoid breaking any
- * old source code that uses the original name...
- */
-#define jtransform_execute_transformation	jtransform_execute_transform
-
-#endif /* TRANSFORMS_SUPPORTED */
-
-
-/*
- * Support for copying optional markers from source to destination file.
- */
-
-typedef enum {
-	JCOPYOPT_NONE,		/* copy no optional markers */
-	JCOPYOPT_COMMENTS,	/* copy only comment (COM) markers */
-	JCOPYOPT_ALL		/* copy all optional markers */
-} JCOPY_OPTION;
-
-#define JCOPYOPT_DEFAULT  JCOPYOPT_COMMENTS	/* recommended default */
-
-/* Setup decompression object to save desired markers in memory */
-EXTERN(void) jcopy_markers_setup
-	JPP((j_decompress_ptr srcinfo, JCOPY_OPTION option));
-/* Copy markers saved in the given source object to the destination object */
-EXTERN(void) jcopy_markers_execute
-	JPP((j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	     JCOPY_OPTION option));
diff --git a/dep/libpng/ANNOUNCE b/dep/libpng/ANNOUNCE
deleted file mode 100644
index bc147adb7..000000000
--- a/dep/libpng/ANNOUNCE
+++ /dev/null
@@ -1,63 +0,0 @@
-libpng 1.6.43 - February 23, 2024
-=================================
-
-This is a public release of libpng, intended for use in production code.
-
-
-Files available for download
-----------------------------
-
-Source files with LF line endings (for Unix/Linux):
-
- * libpng-1.6.43.tar.xz (LZMA-compressed, recommended)
- * libpng-1.6.43.tar.gz (deflate-compressed)
-
-Source files with CRLF line endings (for Windows):
-
- * lpng1643.7z (LZMA-compressed, recommended)
- * lpng1643.zip (deflate-compressed)
-
-Other information:
-
- * README.md
- * LICENSE.md
- * AUTHORS.md
- * TRADEMARK.md
-
-
-Changes from version 1.6.42 to version 1.6.43
----------------------------------------------
-
- * Fixed the row width check in png_check_IHDR().
-   This corrected a bug that was specific to the 16-bit platforms,
-   and removed a spurious compiler warning from the 64-bit builds.
-   (Reported by Jacek Caban; fixed by John Bowler)
- * Added eXIf chunk support to the push-mode reader in pngpread.c.
-   (Contributed by Chris Blume)
- * Added contrib/pngexif for the benefit of the users who would like
-   to inspect the content of eXIf chunks.
- * Added contrib/conftest/basic.dfa, a basic build-time configuration.
-   (Contributed by John Bowler)
- * Fixed a preprocessor condition in pngread.c that broke build-time
-   configurations like contrib/conftest/pngcp.dfa.
-   (Contributed by John Bowler)
- * Added CMake build support for LoongArch LSX.
-   (Contributed by GuXiWei)
- * Fixed a CMake build error that occurred under a peculiar state of the
-   dependency tree. This was a regression introduced in libpng-1.6.41.
-   (Contributed by Dan Rosser)
- * Marked the installed libpng headers as system headers in CMake.
-   (Contributed by Benjamin Buch)
- * Updated the build support for RISCOS.
-   (Contributed by Cameron Cawley)
- * Updated the makefiles to allow cross-platform builds to initialize
-   conventional make variables like AR and ARFLAGS.
- * Added various improvements to the CI scripts in areas like version
-   consistency verification and text linting.
- * Added version consistency verification to pngtest.c also.
-
-
-Send comments/corrections/commendations to png-mng-implement at lists.sf.net.
-Subscription is required; visit
-https://lists.sourceforge.net/lists/listinfo/png-mng-implement
-to subscribe.
diff --git a/dep/libpng/AUTHORS b/dep/libpng/AUTHORS
deleted file mode 100644
index 544341694..000000000
--- a/dep/libpng/AUTHORS
+++ /dev/null
@@ -1,61 +0,0 @@
-PNG REFERENCE LIBRARY AUTHORS
-=============================
-
-This is the list of PNG Reference Library ("libpng") Contributing
-Authors, for copyright and licensing purposes.
-
- * Adam Richter
- * Andreas Dilger
- * Chris Blume
- * Cosmin Truta
- * Dave Martindale
- * Eric S. Raymond
- * Gilles Vollant
- * Glenn Randers-Pehrson
- * Greg Roelofs
- * Guy Eric Schalnat
- * James Yu
- * John Bowler
- * Kevin Bracey
- * Magnus Holmgren
- * Mandar Sahastrabuddhe
- * Mans Rullgard
- * Matt Sarett
- * Mike Klein
- * Pascal Massimino
- * Paul Schmidt
- * Philippe Antoine
- * Qiang Zhou
- * Sam Bushell
- * Samuel Williams
- * Simon-Pierre Cadieux
- * Tim Wegner
- * Tom Lane
- * Tom Tanner
- * Vadim Barkov
- * Willem van Schaik
- * Zhijie Liang
- * Apple Inc.
-    - Zixu Wang (王子旭)
- * Arm Holdings
-    - Richard Townsend
- * Google Inc.
-    - Dan Field
-    - Leon Scroggins III
-    - Matt Sarett
-    - Mike Klein
-    - Sami Boukortt
-    - Wan-Teh Chang
- * Loongson Technology Corporation Ltd.
-    - GuXiWei (顾希伟)
-    - JinBo (金波)
-    - ZhangLixia (张利霞)
-
-The build projects, the build scripts, the test scripts, and other
-files in the "projects", "scripts" and "tests" directories, have
-other copyright owners, but are released under the libpng license.
-
-Some files in the "ci" and "contrib" directories, as well as some
-of the tools-generated files that are distributed with libpng, have
-other copyright owners, and are released under other open source
-licenses.
diff --git a/dep/libpng/CHANGES b/dep/libpng/CHANGES
deleted file mode 100644
index 441b57ecf..000000000
--- a/dep/libpng/CHANGES
+++ /dev/null
@@ -1,6202 +0,0 @@
-CHANGES - changes for libpng
-
-version 0.1 [March 29, 1995]
-  initial work-in-progress release
-
-version 0.2 [April 1, 1995]
-  added reader into png.h
-  fixed small problems in stub file
-
-version 0.3 [April 8, 1995]
-  added pull reader
-  split up pngwrite.c to several files
-  added pnglib.txt
-  added example.c
-  cleaned up writer, adding a few new transformations
-  fixed some bugs in writer
-  interfaced with zlib 0.5
-  added K&R support
-  added check for 64 KB blocks for 16 bit machines
-
-version 0.4 [April 26, 1995]
-  cleaned up code and commented code
-  simplified time handling into png_time
-  created png_color_16 and png_color_8 to handle color needs
-  cleaned up color type defines
-  fixed various bugs
-  made various names more consistent
-  interfaced with zlib 0.71
-  cleaned up zTXt reader and writer (using zlib's Reset functions)
-  split transformations into pngrtran.c and pngwtran.c
-
-version 0.5 [April 30, 1995]
-  interfaced with zlib 0.8
-  fixed many reading and writing bugs
-  saved using 3 spaces instead of tabs
-
-version 0.6 [May 1, 1995]
-  first beta release
-  added png_large_malloc() and png_large_free()
-  added png_size_t
-  cleaned up some compiler warnings
-  added png_start_read_image()
-
-version 0.7 [June 24, 1995]
-  cleaned up lots of bugs
-  finished dithering and other stuff
-  added test program
-  changed name from pnglib to libpng
-
-version 0.71 [June 26, 1995]
-  changed pngtest.png for zlib 0.93
-  fixed error in libpng.txt and example.c
-
-version 0.8 [August 20, 1995]
-  cleaned up some bugs
-  added png_set_filler()
-  split up pngstub.c into pngmem.c, pngio.c, and pngerror.c
-  added #define's to remove unwanted code
-  moved png_info_init() to png.c
-  added old_size into png_realloc()
-  added functions to manually set filtering and compression info
-  changed compression parameters based on image type
-  optimized filter selection code
-  added version info
-  changed external functions passing floats to doubles (k&r problems?)
-  put all the configurable stuff in pngconf.h
-  enabled png_set_shift to work with paletted images on read
-  added png_read_update_info() - updates info structure with transformations
-
-Version 0.81 [August, 1995]
-  incorporated Tim Wegner's medium model code (thanks, Tim)
-
-Version 0.82 [September, 1995]
-  [unspecified changes]
-
-Version 0.85 [December, 1995]
-  added more medium model code (almost everything's a far)
-  added i/o, error, and memory callback functions
-  fixed some bugs (16-bit, 4-bit interlaced, etc.)
-  added first run progressive reader (barely tested)
-
-Version 0.86 [January, 1996]
-  fixed bugs
-  improved documentation
-
-Version 0.87 [January, 1996]
-  fixed medium model bugs
-  fixed other bugs introduced in 0.85 and 0.86
-  added some minor documentation
-
-Version 0.88 [January, 1996]
-  fixed progressive bugs
-  replaced tabs with spaces
-  cleaned up documentation
-  added callbacks for read/write and warning/error functions
-
-Version 0.89 [June 5, 1996]
-  Added new initialization API to make libpng work better with shared libs
-    we now have png_create_read_struct(), png_create_write_struct(),
-    png_create_info_struct(), png_destroy_read_struct(), and
-    png_destroy_write_struct() instead of the separate calls to
-    malloc and png_read_init(), png_info_init(), and png_write_init()
-  Changed warning/error callback functions to fix bug - this means you
-    should use the new initialization API if you were using the old
-    png_set_message_fn() calls, and that the old API no longer exists
-    so that people are aware that they need to change their code
-  Changed filter selection API to allow selection of multiple filters
-    since it didn't work in previous versions of libpng anyways
-  Optimized filter selection code
-  Fixed png_set_background() to allow using an arbitrary RGB color for
-    paletted images
-  Fixed gamma and background correction for paletted images, so
-    png_correct_palette is not needed unless you are correcting an
-    external palette (you will need to #define PNG_CORRECT_PALETTE_SUPPORTED
-    in pngconf.h) - if nobody uses this, it may disappear in the future.
-  Fixed bug with Borland 64K memory allocation (Alexander Lehmann)
-  Fixed bug in interlace handling (Smarasderagd, I think)
-  Added more error checking for writing and image to reduce invalid files
-  Separated read and write functions so that they won't both be linked
-    into a binary when only reading or writing functionality is used
-  New pngtest image also has interlacing and zTXt
-  Updated documentation to reflect new API
-
-Version 0.89c [June 17, 1996]
-  Bug fixes.
-
-Version 0.90 [January, 1997]
-  Made CRC errors/warnings on critical and ancillary chunks configurable
-  libpng will use the zlib CRC routines by (compile-time) default
-  Changed DOS small/medium model memory support - needs zlib 1.04 (Tim Wegner)
-  Added external C++ wrapper statements to png.h (Gilles Dauphin)
-  Allow PNG file to be read when some or all of file signature has already
-    been read from the beginning of the stream.  ****This affects the size
-    of info_struct and invalidates all programs that use a shared libpng****
-  Fixed png_filler() declarations
-  Fixed? background color conversions
-  Fixed order of error function pointers to match documentation
-  Current chunk name is now available in png_struct to reduce the number
-    of nearly identical error messages (will simplify multi-lingual
-    support when available)
-  Try to get ready for unknown-chunk callback functions:
-    - previously read critical chunks are flagged, so the chunk handling
-      routines can determine if the chunk is in the right place
-    - all chunk handling routines have the same prototypes, so we will
-      be able to handle all chunks via a callback mechanism
-  Try to fix Linux "setjmp" buffer size problems
-  Removed png_large_malloc, png_large_free, and png_realloc functions.
-
-Version 0.95 [March, 1997]
-  Fixed bug in pngwutil.c allocating "up_row" twice and "avg_row" never
-  Fixed bug in PNG file signature compares when start != 0
-  Changed parameter type of png_set_filler(...filler...) from png_byte
-    to png_uint_32
-  Added test for MACOS to ensure that both math.h and fp.h are not #included
-  Added macros for libpng to be compiled as a Windows DLL (Andreas Kupries)
-  Added "packswap" transformation, which changes the endianness of
-    packed-pixel bytes (Kevin Bracey)
-  Added "strip_alpha" transformation, which removes the alpha channel of
-    input images without using it (not necessarily a good idea)
-  Added "swap_alpha" transformation, which puts the alpha channel in front
-    of the color bytes instead of after
-  Removed all implicit variable tests which assume NULL == 0 (I think)
-  Changed several variables to "png_size_t" to show 16/32-bit limitations
-  Added new pCAL chunk read/write support
-  Added experimental filter selection weighting (Greg Roelofs)
-  Removed old png_set_rgbx() and png_set_xrgb() functions that have been
-    obsolete for about 2 years now (use png_set_filler() instead)
-  Added macros to read 16- and 32-bit ints directly from buffer, to be
-    used only on those systems that support it (namely PowerPC and 680x0)
-    With some testing, this may become the default for MACOS/PPC systems.
-  Only calculate CRC on data if we are going to use it
-  Added macros for zTXt compression type PNG_zTXt_COMPRESSION_???
-  Added macros for simple libpng debugging output selectable at compile time
-  Removed PNG_READ_END_MODE in progressive reader (Smarasderagd)
-  More description of info_struct in libpng.txt and png.h
-  More instructions in example.c
-  More chunk types tested in pngtest.c
-  Renamed pngrcb.c to pngset.c, and all png_read_<chunk> functions to be
-    png_set_<chunk>.  We now have corresponding png_get_<chunk>
-    functions in pngget.c to get information in info_ptr.  This isolates
-    the application from the internal organization of png_info_struct
-    (good for shared library implementations).
-
-Version 0.96 [May, 1997]
-  Fixed serious bug with < 8bpp images introduced in 0.95
-  Fixed 256-color transparency bug (Greg Roelofs)
-  Fixed up documentation (Greg Roelofs, Laszlo Nyul)
-  Fixed "error" in pngconf.h for Linux setjmp() behavior
-  Fixed DOS medium model support (Tim Wegner)
-  Fixed png_check_keyword() for case with error in static string text
-  Added read of CRC after IEND chunk for embedded PNGs (Laszlo Nyul)
-  Added typecasts to quiet compiler errors
-  Added more debugging info
-
-Version 0.97 [January, 1998]
-  Removed PNG_USE_OWN_CRC capability
-  Relocated png_set_crc_action from pngrutil.c to pngrtran.c
-  Fixed typecasts of "new_key", etc. (Andreas Dilger)
-  Added RFC 1152 [sic] date support
-  Fixed bug in gamma handling of 4-bit grayscale
-  Added 2-bit grayscale gamma handling (Glenn R-P)
-  Added more typecasts. 65536L becomes (png_uint_32)65536L, etc. (Glenn R-P)
-  Minor corrections in libpng.txt
-  Added simple sRGB support (Glenn R-P)
-  Easier conditional compiling, e.g.,
-    define PNG_READ/WRITE_NOT_FULLY_SUPPORTED;
-    all configurable options can be selected from command line instead
-    of having to edit pngconf.h (Glenn R-P)
-  Fixed memory leak in pngwrite.c (free info_ptr->text) (Glenn R-P)
-  Added more conditions for png_do_background, to avoid changing
-    black pixels to background when a background is supplied and
-    no pixels are transparent
-  Repaired PNG_NO_STDIO behavior
-  Tested NODIV support and made it default behavior (Greg Roelofs)
-  Added "-m" option and PNGTEST_DEBUG_MEMORY to pngtest (John Bowler)
-  Regularized version numbering scheme and bumped shared-library major
-    version number to 2 to avoid problems with libpng 0.89 apps
-    (Greg Roelofs)
-
-Version 0.98 [January, 1998]
-  Cleaned up some typos in libpng.txt and in code documentation
-  Fixed memory leaks in pCAL chunk processing (Glenn R-P and John Bowler)
-  Cosmetic change "display_gamma" to "screen_gamma" in pngrtran.c
-  Changed recommendation about file_gamma for PC images to .51 from .45,
-    in example.c and libpng.txt, added comments to distinguish between
-    screen_gamma, viewing_gamma, and display_gamma.
-  Changed all references to RFC1152 to read RFC1123 and changed the
-    PNG_TIME_RFC1152_SUPPORTED macro to PNG_TIME_RFC1123_SUPPORTED
-  Added png_invert_alpha capability (Glenn R-P -- suggestion by Jon Vincent)
-  Changed srgb_intent from png_byte to int to avoid compiler bugs
-
-Version 0.99 [January 30, 1998]
-  Free info_ptr->text instead of end_info_ptr->text in pngread.c (John Bowler)
-  Fixed a longstanding "packswap" bug in pngtrans.c
-  Fixed some inconsistencies in pngconf.h that prevented compiling with
-    PNG_READ_GAMMA_SUPPORTED and PNG_READ_hIST_SUPPORTED undefined
-  Fixed some typos and made other minor rearrangement of libpng.txt (Andreas)
-  Changed recommendation about file_gamma for PC images to .50 from .51 in
-    example.c and libpng.txt, and changed file_gamma for sRGB images to .45
-  Added a number of functions to access information from the png structure
-    png_get_image_height(), etc. (Glenn R-P, suggestion by Brad Pettit)
-  Added TARGET_MACOS similar to zlib-1.0.8
-  Define PNG_ALWAYS_EXTERN when __MWERKS__ && WIN32 are defined
-  Added type casting to all png_malloc() function calls
-
-Version 0.99a [January 31, 1998]
-  Added type casts and parentheses to all returns that return a value.(Tim W.)
-
-Version 0.99b [February 4, 1998]
-  Added type cast png_uint_32 on malloc function calls where needed.
-  Changed type of num_hist from png_uint_32 to int (same as num_palette).
-  Added checks for rowbytes overflow, in case png_size_t is less than 32 bits.
-  Renamed makefile.elf to makefile.lnx.
-
-Version 0.99c [February 7, 1998]
-  More type casting.  Removed erroneous overflow test in pngmem.c.
-  Added png_buffered_memcpy() and png_buffered_memset(), apply them to rowbytes.
-  Added UNIX manual pages libpng.3 (incorporating libpng.txt) and  png.5.
-
-Version 0.99d [February 11, 1998]
-  Renamed "far_to_near()" "png_far_to_near()"
-  Revised libpng.3
-  Version 99c "buffered" operations didn't work as intended.  Replaced them
-    with png_memcpy_check() and png_memset_check().
-  Added many "if (png_ptr == NULL) return" to quell compiler warnings about
-    unused png_ptr, mostly in pngget.c and pngset.c.
-  Check for overlength tRNS chunk present when indexed-color PLTE is read.
-  Cleaned up spelling errors in libpng.3/libpng.txt
-  Corrected a problem with png_get_tRNS() which returned undefined trans array
-
-Version 0.99e [February 28, 1998]
-  Corrected png_get_tRNS() again.
-  Add parentheses for easier reading of pngget.c, fixed "||" should be "&&".
-  Touched up example.c to make more of it compileable, although the entire
-    file still can't be compiled (Willem van Schaik)
-  Fixed a bug in png_do_shift() (Bryan Tsai)
-  Added a space in png.h prototype for png_write_chunk_start()
-  Replaced pngtest.png with one created with zlib 1.1.1
-  Changed pngtest to report PASS even when file size is different (Jean-loup G.)
-  Corrected some logic errors in png_do_invert_alpha() (Chris Patterson)
-
-Version 0.99f [March 5, 1998]
-  Corrected a bug in pngpread() introduced in version 99c (Kevin Bracey)
-  Moved makefiles into a "scripts" directory, and added INSTALL instruction file
-  Added makefile.os2 and pngos2.def (A. Zabolotny) and makefile.s2x (W. Sebok)
-  Added pointers to "note on libpng versions" in makefile.lnx and README
-  Added row callback feature when reading and writing nonprogressive rows
-    and added a test of this feature in pngtest.c
-  Added user transform callbacks, with test of the feature in pngtest.c
-
-Version 0.99g [March 6, 1998, morning]
-  Minor changes to pngtest.c to suppress compiler warnings.
-  Removed "beta" language from documentation.
-
-Version 0.99h [March 6, 1998, evening]
-  Minor changes to previous minor changes to pngtest.c
-  Changed PNG_READ_NOT_FULLY_SUPPORTED to PNG_READ_TRANSFORMS_NOT_SUPPORTED
-    and added PNG_PROGRESSIVE_READ_NOT_SUPPORTED macro
-  Added user transform capability
-
-Version 1.00 [March 7, 1998]
-  Changed several typedefs in pngrutil.c
-  Added makefile.wat (Pawel Mrochen), updated makefile.tc3 (Willem van Schaik)
-  Replaced "while(1)" with "for(;;)"
-  Added PNGARG() to prototypes in pngtest.c and removed some prototypes
-  Updated some of the makefiles (Tom Lane)
-  Changed some typedefs (s_start, etc.) in pngrutil.c
-  Fixed dimensions of "short_months" array in pngwrite.c
-  Replaced ansi2knr.c with the one from jpeg-v6
-
-Version 1.0.0 [March 8, 1998]
-  Changed name from 1.00 to 1.0.0 (Adam Costello)
-  Added smakefile.ppc (with SCOPTIONS.ppc) for Amiga PPC (Andreas Kleinert)
-
-Version 1.0.0a [March 9, 1998]
-  Fixed three bugs in pngrtran.c to make gamma+background handling consistent
-    (Greg Roelofs)
-  Changed format of the PNG_LIBPNG_VER integer to xyyzz instead of xyz
-    for major, minor, and bugfix releases.  This is 10001. (Adam Costello,
-    Tom Lane)
-  Make months range from 1-12 in png_convert_to_rfc1123
-
-Version 1.0.0b [March 13, 1998]
-  Quieted compiler complaints about two empty "for" loops in pngrutil.c
-  Minor changes to makefile.s2x
-  Removed #ifdef/#endif around a png_free() in pngread.c
-
-Version 1.0.1 [March 14, 1998]
-  Changed makefile.s2x to reduce security risk of using a relative pathname
-  Fixed some typos in the documentation (Greg).
-  Fixed a problem with value of "channels" returned by png_read_update_info()
-
-Version 1.0.1a [April 21, 1998]
-  Optimized Paeth calculations by replacing abs() function calls with intrinsics
-  plus other loop optimizations. Improves avg decoding speed by about 20%.
-  Commented out i386istic "align" compiler flags in makefile.lnx.
-  Reduced the default warning level in some makefiles, to make them consistent.
-  Removed references to IJG and JPEG in the ansi2knr.c copyright statement.
-  Fixed a bug in png_do_strip_filler with XXRRGGBB => RRGGBB transformation.
-  Added grayscale and 16-bit capability to png_do_read_filler().
-  Fixed a bug in pngset.c, introduced in version 0.99c, that sets rowbytes
-    too large when writing an image with bit_depth < 8 (Bob Dellaca).
-  Corrected some bugs in the experimental weighted filtering heuristics.
-  Moved a misplaced pngrutil code block that truncates tRNS if it has more
-    than num_palette entries -- test was done before num_palette was defined.
-  Fixed a png_convert_to_rfc1123() bug that converts day 31 to 0 (Steve Eddins).
-  Changed compiler flags in makefile.wat for better optimization
-    (Pawel Mrochen).
-
-Version 1.0.1b [May 2, 1998]
-  Relocated png_do_gray_to_rgb() within png_do_read_transformations() (Greg).
-  Relocated the png_composite macros from pngrtran.c to png.h (Greg).
-  Added makefile.sco (contributed by Mike Hopkirk).
-  Fixed two bugs (missing definitions of "istop") introduced in libpng-1.0.1a.
-  Fixed a bug in pngrtran.c that would set channels=5 under some circumstances.
-  More work on the Paeth-filtering, achieving imperceptible speedup
-    (A Kleinert).
-  More work on loop optimization which may help when compiled with C++
-    compilers.
-  Added warnings when people try to use transforms they've defined out.
-  Collapsed 4 "i" and "c" loops into single "i" loops in pngrtran and pngwtran.
-  Revised paragraph about png_set_expand() in libpng.txt and libpng.3 (Greg)
-
-Version 1.0.1c [May 11, 1998]
-  Fixed a bug in pngrtran.c (introduced in libpng-1.0.1a) where the masks for
-    filler bytes should have been 0xff instead of 0xf.
-  Added max_pixel_depth=32 in pngrutil.c when using FILLER with palette images.
-  Moved PNG_WRITE_WEIGHTED_FILTER_SUPPORTED and PNG_WRITE_FLUSH_SUPPORTED
-    out of the PNG_WRITE_TRANSFORMS_NOT_SUPPORTED block of pngconf.h
-  Added "PNG_NO_WRITE_TRANSFORMS" etc., as alternatives for *_NOT_SUPPORTED,
-    for consistency, in pngconf.h
-  Added individual "ifndef PNG_NO_[CAPABILITY]" in pngconf.h to make it easier
-    to remove unwanted capabilities via the compile line
-  Made some corrections to grammar (which, it's) in documentation (Greg).
-  Corrected example.c, use of row_pointers in png_write_image().
-
-Version 1.0.1d [May 24, 1998]
-  Corrected several statements that used side effects illegally in pngrutil.c
-    and pngtrans.c, that were introduced in version 1.0.1b
-  Revised png_read_rows() to avoid repeated if-testing for NULL (A Kleinert)
-  More corrections to example.c, use of row_pointers in png_write_image()
-    and png_read_rows().
-  Added pngdll.mak and pngdef.pas to scripts directory, contributed by
-    Bob Dellaca, to make a png32bd.dll with Borland C++ 4.5
-  Fixed error in example.c with png_set_text: num_text is 3, not 2 (Guido V.)
-  Changed several loops from count-down to count-up, for consistency.
-
-Version 1.0.1e [June 6, 1998]
-  Revised libpng.txt and libpng.3 description of png_set_read|write_fn(), and
-    added warnings when people try to set png_read_fn and png_write_fn in
-    the same structure.
-  Added a test such that png_do_gamma will be done when num_trans==0
-    for truecolor images that have defined a background.  This corrects an
-    error that was introduced in libpng-0.90 that can cause gamma processing
-    to be skipped.
-  Added tests in png.h to include "trans" and "trans_values" in structures
-    when PNG_READ_BACKGROUND_SUPPORTED or PNG_READ_EXPAND_SUPPORTED is defined.
-  Add png_free(png_ptr->time_buffer) in png_destroy_read_struct()
-  Moved png_convert_to_rfc_1123() from pngwrite.c to png.c
-  Added capability for user-provided malloc_fn() and free_fn() functions,
-    and revised pngtest.c to demonstrate their use, replacing the
-    PNGTEST_DEBUG_MEM feature.
-  Added makefile.w32, for Microsoft C++ 4.0 and later (Tim Wegner).
-
-Version 1.0.2 [June 14, 1998]
-  Fixed two bugs in makefile.bor .
-
-Version 1.0.2a [December 30, 1998]
-  Replaced and extended code that was removed from png_set_filler() in 1.0.1a.
-  Fixed a bug in png_do_filler() that made it fail to write filler bytes in
-    the left-most pixel of each row (Kevin Bracey).
-  Changed "static pngcharp tIME_string" to "static char tIME_string[30]"
-    in pngtest.c (Duncan Simpson).
-  Fixed a bug in pngtest.c that caused pngtest to try to write a tIME chunk
-    even when no tIME chunk was present in the source file.
-  Fixed a problem in pngrutil.c: gray_to_rgb didn't always work with 16-bit.
-  Fixed a problem in png_read_push_finish_row(), which would not skip some
-    passes that it should skip, for images that are less than 3 pixels high.
-  Interchanged the order of calls to png_do_swap() and png_do_shift()
-    in pngwtran.c (John Cromer).
-  Added #ifdef PNG_DEBUG/#endif surrounding use of PNG_DEBUG in png.h .
-  Changed "bad adaptive filter type" from error to warning in pngrutil.c .
-  Fixed a documentation error about default filtering with 8-bit indexed-color.
-  Separated the PNG_NO_STDIO macro into PNG_NO_STDIO and PNG_NO_CONSOLE_IO
-    (L. Peter Deutsch).
-  Added png_set_rgb_to_gray() and png_get_rgb_to_gray_status() functions.
-  Added png_get_copyright() and png_get_header_version() functions.
-  Revised comments on png_set_progressive_read_fn() in libpng.txt and example.c
-  Added information about debugging in libpng.txt and libpng.3 .
-  Changed "ln -sf" to "ln -s -f" in makefile.s2x, makefile.lnx, and
-    makefile.sco.
-  Removed lines after Dynamic Dependencies" in makefile.aco .
-  Revised makefile.dec to make a shared library (Jeremie Petit).
-  Removed trailing blanks from all files.
-
-Version 1.0.2a [January 6, 1999]
-  Removed misplaced #endif and #ifdef PNG_NO_EXTERN near the end of png.h
-  Added "if" tests to silence complaints about unused png_ptr in png.h and png.c
-  Changed "check_if_png" function in example.c to return true (nonzero) if PNG.
-  Changed libpng.txt to demonstrate png_sig_cmp() instead of png_check_sig()
-    which is obsolete.
-
-Version 1.0.3 [January 14, 1999]
-  Added makefile.hux, for Hewlett Packard HPUX 10.20 and 11.00 (Jim Rice)
-  Added a statement of Y2K compliance in png.h, libpng.3, and Y2KINFO.
-
-Version 1.0.3a [August 12, 1999]
-  Added check for PNG_READ_INTERLACE_SUPPORTED in pngread.c; issue a warning
-    if an attempt is made to read an interlaced image when it's not supported.
-  Added check if png_ptr->trans is defined before freeing it in pngread.c
-  Modified the Y2K statement to include versions back to version 0.71
-  Fixed a bug in the check for valid IHDR bit_depth/color_types in pngrutil.c
-  Modified makefile.wat (added -zp8 flag, ".symbolic", changed some comments)
-  Replaced leading blanks with tab characters in makefile.hux
-  Changed "dworkin.wustl.edu" to "ccrc.wustl.edu" in various documents.
-  Changed (float)red and (float)green to (double)red, (double)green
-    in png_set_rgb_to_gray() to avoid "promotion" problems in AIX.
-  Fixed a bug in pngconf.h that omitted <stdio.h> when PNG_DEBUG==0 (K Bracey).
-  Reformatted libpng.3 and libpngpf.3 with proper fonts (script by J. vanZandt).
-  Updated documentation to refer to the PNG-1.2 specification.
-  Removed ansi2knr.c and left pointers to the latest source for ansi2knr.c
-    in makefile.knr, INSTALL, and README (L. Peter Deutsch)
-  Fixed bugs in calculation of the length of rowbytes when adding alpha
-    channels to 16-bit images, in pngrtran.c (Chris Nokleberg)
-  Added function png_set_user_transform_info() to store user_transform_ptr,
-    user_depth, and user_channels into the png_struct, and a function
-    png_get_user_transform_ptr() to retrieve the pointer (Chris Nokleberg)
-  Added function png_set_empty_plte_permitted() to make libpng useable
-    in MNG applications.
-  Corrected the typedef for png_free_ptr in png.h (Jesse Jones).
-  Correct gamma with srgb is 45455 instead of 45000 in pngrutil.c, to be
-    consistent with PNG-1.2, and allow variance of 500 before complaining.
-  Added assembler code contributed by Intel in file pngvcrd.c and modified
-    makefile.w32 to use it (Nirav Chhatrapati, INTEL Corporation,
-    Gilles Vollant)
-  Changed "ln -s -f" to "ln -f -s" in the makefiles to make Solaris happy.
-  Added some aliases for png_set_expand() in pngrtran.c, namely
-    png_set_expand_PLTE(), png_set_expand_depth(), and png_set_expand_tRNS()
-    (Greg Roelofs, in "PNG: The Definitive Guide").
-  Added makefile.beo for BEOS on X86, contributed by Sander Stok.
-
-Version 1.0.3b [August 26, 1999]
-  Replaced 2147483647L several places with PNG_MAX_UINT macro, defined in png.h
-  Changed leading blanks to tabs in all makefiles.
-  Define PNG_USE_PNGVCRD in makefile.w32, to get MMX assembler code.
-  Made alternate versions of  png_set_expand() in pngrtran.c, namely
-    png_set_gray_1_2_4_to_8, png_set_palette_to_rgb, and png_set_tRNS_to_alpha
-    (Greg Roelofs, in "PNG: The Definitive Guide").  Deleted the 1.0.3a aliases.
-  Relocated start of 'extern "C"' block in png.h so it doesn't include pngconf.h
-  Revised calculation of num_blocks in pngmem.c to avoid a potentially
-    negative shift distance, whose results are undefined in the C language.
-  Added a check in pngset.c to prevent writing multiple tIME chunks.
-  Added a check in pngwrite.c to detect invalid small window_bits sizes.
-
-Version 1.0.3d [September 4, 1999]
-  Fixed type casting of igamma in pngrutil.c
-  Added new png_expand functions to scripts/pngdef.pas and pngos2.def
-  Added a demo read_user_transform_fn that examines the row filters in pngtest.c
-
-Version 1.0.4 [September 24, 1999, not distributed publicly]
-  Define PNG_ALWAYS_EXTERN in pngconf.h if __STDC__ is defined
-  Delete #define PNG_INTERNAL and include "png.h" from pngasmrd.h
-  Made several minor corrections to pngtest.c
-  Renamed the makefiles with longer but more user friendly extensions.
-  Copied the PNG copyright and license to a separate LICENSE file.
-  Revised documentation, png.h, and example.c to remove reference to
-    "viewing_gamma" which no longer appears in the PNG specification.
-  Revised pngvcrd.c to use MMX code for interlacing only on the final pass.
-  Updated pngvcrd.c to use the faster C filter algorithms from libpng-1.0.1a
-  Split makefile.win32vc into two versions, makefile.vcawin32 (uses MMX
-    assembler code) and makefile.vcwin32 (doesn't).
-  Added a CPU timing report to pngtest.c (enabled by defining PNGTEST_TIMING)
-  Added a copy of pngnow.png to the distribution.
-
-Version 1.0.4a [September 25, 1999]
-  Increase max_pixel_depth in pngrutil.c if a user transform needs it.
-  Changed several division operations to right-shifts in pngvcrd.c
-
-Version 1.0.4b [September 30, 1999]
-  Added parentheses in line 3732 of pngvcrd.c
-  Added a comment in makefile.linux warning about buggy -O3 in pgcc 2.95.1
-
-Version 1.0.4c [October 1, 1999]
-  Added a "png_check_version" function in png.c and pngtest.c that will generate
-    a helpful compiler error if an old png.h is found in the search path.
-  Changed type of png_user_transform_depth|channels from int to png_byte.
-  Added "Libpng is OSI Certified Open Source Software" statement to png.h
-
-Version 1.0.4d [October 6, 1999]
-  Changed 0.45 to 0.45455 in png_set_sRGB()
-  Removed unused PLTE entries from pngnow.png
-  Re-enabled some parts of pngvcrd.c (png_combine_row) that work properly.
-
-Version 1.0.4e [October 10, 1999]
-  Fixed sign error in pngvcrd.c (Greg Roelofs)
-  Replaced some instances of memcpy with simple assignments in pngvcrd (GR-P)
-
-Version 1.0.4f [October 15, 1999]
-  Surrounded example.c code with #if 0 .. #endif to prevent people from
-    inadvertently trying to compile it.
-  Changed png_get_header_version() from a function to a macro in png.h
-  Added type casting mostly in pngrtran.c and pngwtran.c
-  Removed some pointless "ptr = NULL" in pngmem.c
-  Added a "contrib" directory containing the source code from Greg's book.
-
-Version 1.0.5 [October 15, 1999]
-  Minor editing of the INSTALL and README files.
-
-Version 1.0.5a [October 23, 1999]
-  Added contrib/pngsuite and contrib/pngminus (Willem van Schaik)
-  Fixed a typo in the png_set_sRGB() function call in example.c (Jan Nijtmans)
-  Further optimization and bugfix of pngvcrd.c
-  Revised pngset.c so that it does not allocate or free memory in the user's
-    text_ptr structure.  Instead, it makes its own copy.
-  Created separate write_end_info_struct in pngtest.c for a more severe test.
-  Added code in pngwrite.c to free info_ptr->text[i].key to stop a memory leak.
-
-Version 1.0.5b [November 23, 1999]
-  Moved PNG_FLAG_HAVE_CHUNK_HEADER, PNG_FLAG_BACKGROUND_IS_GRAY and
-    PNG_FLAG_WROTE_tIME from flags to mode.
-  Added png_write_info_before_PLTE() function.
-  Fixed some typecasting in contrib/gregbook/*.c
-  Updated scripts/makevms.com and added makevms.com to contrib/gregbook
-    and contrib/pngminus (Martin Zinser)
-
-Version 1.0.5c [November 26, 1999]
-  Moved png_get_header_version from png.h to png.c, to accommodate ansi2knr.
-  Removed all global arrays (according to PNG_NO_GLOBAL_ARRAYS macro), to
-    accommodate making DLL's: Moved usr_png_ver from global variable to function
-    png_get_header_ver() in png.c.  Moved png_sig to png_sig_bytes in png.c and
-    eliminated use of png_sig in pngwutil.c.  Moved the various png_CHNK arrays
-    into pngtypes.h.  Eliminated use of global png_pass arrays.  Declared the
-    png_CHNK and png_pass arrays to be "const".  Made the global arrays
-    available to applications (although none are used in libpng itself) when
-    PNG_NO_GLOBAL_ARRAYS is not defined or when PNG_GLOBAL_ARRAYS is defined.
-  Removed some extraneous "-I" from contrib/pngminus/makefile.std
-  Changed the PNG_sRGB_INTENT macros in png.h to be consistent with PNG-1.2.
-  Change PNG_SRGB_INTENT to PNG_sRGB_INTENT in libpng.txt and libpng.3
-
-Version 1.0.5d [November 29, 1999]
-  Add type cast (png_const_charp) two places in png.c
-  Eliminated pngtypes.h; use macros instead to declare PNG_CHNK arrays.
-  Renamed "PNG_GLOBAL_ARRAYS" to "PNG_USE_GLOBAL_ARRAYS" and made available
-    to applications a macro "PNG_USE_LOCAL_ARRAYS".
-  comment out (with #ifdef) all the new declarations when
-    PNG_USE_GLOBAL_ARRAYS is defined.
-  Added PNG_EXPORT_VAR macro to accommodate making DLL's.
-
-Version 1.0.5e [November 30, 1999]
-  Added iCCP, iTXt, and sPLT support; added "lang" member to the png_text
-    structure; refactored the inflate/deflate support to make adding new chunks
-    with trailing compressed parts easier in the future, and added new functions
-    png_free_iCCP, png_free_pCAL, png_free_sPLT, png_free_text, png_get_iCCP,
-    png_get_spalettes, png_set_iCCP, png_set_spalettes (Eric S. Raymond).
-    NOTE: Applications that write text chunks MUST define png_text->lang
-    before calling png_set_text(). It must be set to NULL if you want to
-    write tEXt or zTXt chunks.  If you want your application to be able to
-    run with older versions of libpng, use
-
-      #ifdef PNG_iTXt_SUPPORTED
-         png_text[i].lang = NULL;
-      #endif
-
-  Changed png_get_oFFs() and png_set_oFFs() to use signed rather than unsigned
-    offsets (Eric S. Raymond).
-  Combined PNG_READ_cHNK_SUPPORTED and PNG_WRITE_cHNK_SUPPORTED macros into
-    PNG_cHNK_SUPPORTED and combined the three types of PNG_text_SUPPORTED
-    macros, leaving the separate macros also available.
-  Removed comments on #endifs at the end of many short, non-nested #if-blocks.
-
-Version 1.0.5f [December 6, 1999]
-  Changed makefile.solaris to issue a warning about potential problems when
-    the ucb "ld" is in the path ahead of the ccs "ld".
-  Removed "- [date]" from the "synopsis" line in libpng.3 and libpngpf.3.
-  Added sCAL chunk support (Eric S. Raymond).
-
-Version 1.0.5g [December 7, 1999]
-  Fixed "png_free_spallettes" typo in png.h
-  Added code to handle new chunks in pngpread.c
-  Moved PNG_CHNK string macro definitions outside of PNG_NO_EXTERN block
-  Added "translated_key" to png_text structure and png_write_iTXt().
-  Added code in pngwrite.c to work around a newly discovered zlib bug.
-
-Version 1.0.5h [December 10, 1999]
-  NOTE: regarding the note for version 1.0.5e, the following must also
-    be included in your code:
-        png_text[i].translated_key = NULL;
-  Unknown chunk handling is now supported.
-  Option to eliminate all floating point support was added.  Some new
-    fixed-point functions such as png_set_gAMA_fixed() were added.
-  Expanded tabs and removed trailing blanks in source files.
-
-Version 1.0.5i [December 13, 1999]
-  Added some type casts to silence compiler warnings.
-  Renamed "png_free_spalette" to "png_free_spalettes" for consistency.
-  Removed leading blanks from a #define in pngvcrd.c
-  Added some parameters to the new png_set_keep_unknown_chunks() function.
-  Added a test for up->location != 0 in the first instance of writing
-    unknown chunks in pngwrite.c
-  Changed "num" to "i" in png_free_spalettes() and png_free_unknowns() to
-    prevent recursion.
-  Added png_free_hIST() function.
-  Various patches to fix bugs in the sCAL and integer cHRM processing,
-    and to add some convenience macros for use with sCAL.
-
-Version 1.0.5j [December 21, 1999]
-  Changed "unit" parameter of png_write_sCAL from png_byte to int, to work
-    around buggy compilers.
-  Added new type "png_fixed_point" for integers that hold float*100000 values
-  Restored backward compatibility of tEXt/zTXt chunk processing:
-    Restored the first four members of png_text to the same order as v.1.0.5d.
-    Added members "lang_key" and "itxt_length" to png_text struct.  Set
-    text_length=0 when "text" contains iTXt data.  Use the "compression"
-    member to distinguish among tEXt/zTXt/iTXt types.  Added
-    PNG_ITXT_COMPRESSION_NONE (1) and PNG_ITXT_COMPRESSION_zTXt(2) macros.
-    The "Note" above, about backward incompatibility of libpng-1.0.5e, no
-    longer applies.
-  Fixed png_read|write_iTXt() to read|write parameters in the right order,
-    and to write the iTXt chunk after IDAT if it appears in the end_ptr.
-  Added pnggccrd.c, version of pngvcrd.c Intel assembler for gcc (Greg Roelofs)
-  Reversed the order of trying to write floating-point and fixed-point gAMA.
-
-Version 1.0.5k [December 27, 1999]
-  Added many parentheses, e.g., "if (a && b & c)" becomes "if (a && (b & c))"
-  Added png_handle_as_unknown() function (Glenn)
-  Added png_free_chunk_list() function and chunk_list and num_chunk_list members
-    of png_ptr.
-  Eliminated erroneous warnings about multiple sPLT chunks and sPLT-after-PLTE.
-  Fixed a libpng-1.0.5h bug in pngrutil.c that was issuing erroneous warnings
-    about ignoring incorrect gAMA with sRGB (gAMA was in fact not ignored)
-  Added png_free_tRNS(); png_set_tRNS() now malloc's its own trans array (ESR).
-  Define png_get_int_32 when oFFs chunk is supported as well as when pCAL is.
-  Changed type of proflen from png_int_32 to png_uint_32 in png_get_iCCP().
-
-Version 1.0.5l [January 1, 2000]
-  Added functions png_set_read_user_chunk_fn() and png_get_user_chunk_ptr()
-    for setting a callback function to handle unknown chunks and for
-    retrieving the associated user pointer (Glenn).
-
-Version 1.0.5m [January 7, 2000]
-  Added high-level functions png_read_png(), png_write_png(), png_free_pixels().
-
-Version 1.0.5n [January 9, 2000]
-  Added png_free_PLTE() function, and modified png_set_PLTE() to malloc its
-    own memory for info_ptr->palette.  This makes it safe for the calling
-    application to free its copy of the palette any time after it calls
-    png_set_PLTE().
-
-Version 1.0.5o [January 20, 2000]
-  Cosmetic changes only (removed some trailing blanks and TABs)
-
-Version 1.0.5p [January 31, 2000]
-  Renamed pngdll.mak to makefile.bd32
-  Cosmetic changes in pngtest.c
-
-Version 1.0.5q [February 5, 2000]
-  Relocated the makefile.solaris warning about PATH problems.
-  Fixed pngvcrd.c bug by pushing/popping registers in mmxsupport (Bruce Oberg)
-  Revised makefile.gcmmx
-  Added PNG_SETJMP_SUPPORTED, PNG_SETJMP_NOT_SUPPORTED, and PNG_ABORT() macros
-
-Version 1.0.5r [February 7, 2000]
-  Removed superfluous prototype for png_get_itxt from png.h
-  Fixed a bug in pngrtran.c that improperly expanded the background color.
-  Return *num_text=0 from png_get_text() when appropriate, and fix documentation
-    of png_get_text() in libpng.txt/libpng.3.
-
-Version 1.0.5s [February 18, 2000]
-  Added "png_jmp_env()" macro to pngconf.h, to help people migrate to the
-    new error handler that's planned for the next libpng release, and changed
-    example.c, pngtest.c, and contrib programs to use this macro.
-  Revised some of the DLL-export macros in pngconf.h (Greg Roelofs)
-  Fixed a bug in png_read_png() that caused it to fail to expand some images
-    that it should have expanded.
-  Fixed some mistakes in the unused and undocumented INCH_CONVERSIONS functions
-    in pngget.c
-  Changed the allocation of palette, history, and trans arrays back to
-    the version 1.0.5 method (linking instead of copying) which restores
-    backward compatibility with version 1.0.5.  Added some remarks about
-    that in example.c.  Added "free_me" member to info_ptr and png_ptr
-    and added png_free_data() function.
-  Updated makefile.linux and makefile.gccmmx to make directories conditionally.
-  Made cosmetic changes to pngasmrd.h
-  Added png_set_rows() and png_get_rows(), for use with png_read|write_png().
-  Modified png_read_png() to allocate info_ptr->row_pointers only if it
-    hasn't already been allocated.
-
-Version 1.0.5t [March 4, 2000]
-  Changed png_jmp_env() migration aiding macro to png_jmpbuf().
-  Fixed "interlace" typo (should be "interlaced") in contrib/gregbook/read2-x.c
-  Fixed bug with use of PNG_BEFORE_IHDR bit in png_ptr->mode, introduced when
-    PNG_FLAG_HAVE_CHUNK_HEADER was moved into png_ptr->mode in version 1.0.5b
-  Files in contrib/gregbook were revised to use png_jmpbuf() and to select
-    a 24-bit visual if one is available, and to allow abbreviated options.
-  Files in contrib/pngminus were revised to use the png_jmpbuf() macro.
-  Removed spaces in makefile.linux and makefile.gcmmx, introduced in 1.0.5s
-
-Version 1.0.5u [March 5, 2000]
-  Simplified the code that detects old png.h in png.c and pngtest.c
-  Renamed png_spalette (_p, _pp) to png_sPLT_t (_tp, _tpp)
-  Increased precision of rgb_to_gray calculations from 8 to 15 bits and
-    added png_set_rgb_to_gray_fixed() function.
-  Added makefile.bc32 (32-bit Borland C++, C mode)
-
-Version 1.0.5v [March 11, 2000]
-  Added some parentheses to the png_jmpbuf macro definition.
-  Updated references to the zlib home page, which has moved to freesoftware.com.
-  Corrected bugs in documentation regarding png_read_row() and png_write_row().
-  Updated documentation of png_rgb_to_gray calculations in libpng.3/libpng.txt.
-  Renamed makefile.borland,turboc3 back to makefile.bor,tc3 as in version 1.0.3,
-    revised borland makefiles; added makefile.ibmvac3 and makefile.gcc (Cosmin)
-
-Version 1.0.6 [March 20, 2000]
-  Minor revisions of makefile.bor, libpng.txt, and gregbook/rpng2-win.c
-  Added makefile.sggcc (SGI IRIX with gcc)
-
-Version 1.0.6d [April 7, 2000]
-  Changed sprintf() to strcpy() in png_write_sCAL_s() to work without STDIO
-  Added data_length parameter to png_decompress_chunk() function
-  Revised documentation to remove reference to abandoned png_free_chnk functions
-  Fixed an error in png_rgb_to_gray_fixed()
-  Revised example.c, usage of png_destroy_write_struct().
-  Renamed makefile.ibmvac3 to makefile.ibmc, added libpng.icc IBM project file
-  Added a check for info_ptr->free_me&PNG_FREE_TEXT when freeing text in png.c
-  Simplify png_sig_bytes() function to remove use of non-ISO-C strdup().
-
-Version 1.0.6e [April 9, 2000]
-  Added png_data_freer() function.
-  In the code that checks for over-length tRNS chunks, added check of
-    info_ptr->num_trans as well as png_ptr->num_trans (Matthias Benckmann)
-  Minor revisions of libpng.txt/libpng.3.
-  Check for existing data and free it if the free_me flag is set, in png_set_*()
-    and png_handle_*().
-  Only define PNG_WEIGHTED_FILTERS_SUPPORTED when PNG_FLOATING_POINT_SUPPORTED
-    is defined.
-  Changed several instances of PNG_NO_CONSOLE_ID to PNG_NO_STDIO in pngrutil.c
-    and mentioned the purposes of the two macros in libpng.txt/libpng.3.
-
-Version 1.0.6f [April 14, 2000]
-  Revised png_set_iCCP() and png_set_rows() to avoid prematurely freeing data.
-  Add checks in png_set_text() for NULL members of the input text structure.
-  Revised libpng.txt/libpng.3.
-  Removed superfluous prototype for png_set_iTXt from png.h
-  Removed "else" from pngread.c, after png_error(), and changed "0" to "length".
-  Changed several png_errors about malformed ancillary chunks to png_warnings.
-
-Version 1.0.6g [April 24, 2000]
-  Added png_pass-* arrays to pnggccrd.c when PNG_USE_LOCAL_ARRAYS is defined.
-  Relocated paragraph about png_set_background() in libpng.3/libpng.txt
-    and other revisions (Matthias Benckmann)
-  Relocated info_ptr->free_me, png_ptr->free_me, and other info_ptr and
-    png_ptr members to restore binary compatibility with libpng-1.0.5
-    (breaks compatibility with libpng-1.0.6).
-
-Version 1.0.6h [April 24, 2000]
-  Changed shared library so-number pattern from 2.x.y.z to xy.z (this builds
-    libpng.so.10 & libpng.so.10.6h instead of libpng.so.2 & libpng.so.2.1.0.6h)
-    This is a temporary change for test purposes.
-
-Version 1.0.6i [May 2, 2000]
-  Rearranged some members at the end of png_info and png_struct, to put
-    unknown_chunks_num and free_me within the original size of the png_structs
-    and free_me, png_read_user_fn, and png_free_fn within the original png_info,
-    because some old applications allocate the structs directly instead of
-    using png_create_*().
-  Added documentation of user memory functions in libpng.txt/libpng.3
-  Modified png_read_png so that it will use user_allocated row_pointers
-    if present, unless free_me directs that it be freed, and added description
-    of the use of png_set_rows() and png_get_rows() in libpng.txt/libpng.3.
-  Added PNG_LEGACY_SUPPORTED macro, and #ifdef out all new (since version
-    1.00) members of png_struct and png_info, to regain binary compatibility
-    when you define this macro.  Capabilities lost in this event
-    are user transforms (new in version 1.0.0),the user transform pointer
-    (new in version 1.0.2), rgb_to_gray (new in 1.0.5), iCCP, sCAL, sPLT,
-    the high-level interface, and unknown chunks support (all new in 1.0.6).
-    This was necessary because of old applications that allocate the structs
-    directly as authors were instructed to do in libpng-0.88 and earlier,
-    instead of using png_create_*().
-  Added modes PNG_CREATED_READ_STRUCT and PNG_CREATED_WRITE_STRUCT which
-    can be used to detect codes that directly allocate the structs, and
-    code to check these modes in png_read_init() and png_write_init() and
-    generate a libpng error if the modes aren't set and PNG_LEGACY_SUPPORTED
-    was not defined.
-  Added makefile.intel and updated makefile.watcom (Pawel Mrochen)
-
-Version 1.0.6j [May 3, 2000]
-  Overloaded png_read_init() and png_write_init() with macros that convert
-    calls to png_read_init_2() or png_write_init_2() that check the version
-    and structure sizes.
-
-Version 1.0.7beta11 [May 7, 2000]
-  Removed the new PNG_CREATED_READ_STRUCT and PNG_CREATED_WRITE_STRUCT modes
-    which are no longer used.
-  Eliminated the three new members of png_text when PNG_LEGACY_SUPPORTED is
-    defined or when neither PNG_READ_iTXt_SUPPORTED nor PNG_WRITE_iTXt_SUPPORTED
-    is defined.
-  Made PNG_NO_READ|WRITE_iTXt the default setting, to avoid memory
-    overrun when old applications fill the info_ptr->text structure directly.
-  Added PNGAPI macro, and added it to the definitions of all exported functions.
-  Relocated version macro definitions ahead of the includes of zlib.h and
-    pngconf.h in png.h.
-
-Version 1.0.7beta12 [May 12, 2000]
-  Revised pngset.c to avoid a problem with expanding the png_debug macro.
-  Deleted some extraneous defines from pngconf.h
-  Made PNG_NO_CONSOLE_IO the default condition when PNG_BUILD_DLL is defined.
-  Use MSC _RPTn debugging instead of fprintf if _MSC_VER is defined.
-  Added png_access_version_number() function.
-  Check for mask&PNG_FREE_CHNK (for TEXT, SCAL, PCAL) in png_free_data().
-  Expanded libpng.3/libpng.txt information about png_data_freer().
-
-Version 1.0.7beta14 [May 17, 2000] (beta13 was not published)
-  Changed pnggccrd.c and pngvcrd.c to handle bad adaptive filter types as
-    warnings instead of errors, as pngrutil.c does.
-  Set the PNG_INFO_IDAT valid flag in png_set_rows() so png_write_png()
-    will actually write IDATs.
-  Made the default PNG_USE_LOCAL_ARRAYS depend on PNG_DLL instead of WIN32.
-  Make png_free_data() ignore its final parameter except when freeing data
-    that can have multiple instances (text, sPLT, unknowns).
-  Fixed a new bug in png_set_rows().
-  Removed info_ptr->valid tests from png_free_data(), as in version 1.0.5.
-  Added png_set_invalid() function.
-  Fixed incorrect illustrations of png_destroy_write_struct() in example.c.
-
-Version 1.0.7beta15 [May 30, 2000]
-  Revised the deliberately erroneous Linux setjmp code in pngconf.h to produce
-    fewer error messages.
-  Rearranged checks for Z_OK to check the most likely path first in pngpread.c
-    and pngwutil.c.
-  Added checks in pngtest.c for png_create_*() returning NULL, and mentioned
-    in libpng.txt/libpng.3 the need for applications to check this.
-  Changed names of png_default_*() functions in pngtest to pngtest_*().
-  Changed return type of png_get_x|y_offset_*() from png_uint_32 to png_int_32.
-  Fixed some bugs in the unused PNG_INCH_CONVERSIONS functions in pngget.c
-  Set each pointer to NULL after freeing it in png_free_data().
-  Worked around a problem in pngconf.h; AIX's strings.h defines an "index"
-    macro that conflicts with libpng's png_color_16.index. (Dimitri
-    Papadapoulos)
-  Added "msvc" directory with MSVC++ project files (Simon-Pierre Cadieux).
-
-Version 1.0.7beta16 [June 4, 2000]
-  Revised the workaround of AIX string.h "index" bug.
-  Added a check for overlength PLTE chunk in pngrutil.c.
-  Added PNG_NO_POINTER_INDEXING macro to use array-indexing instead of pointer
-    indexing in pngrutil.c and pngwutil.c to accommodate a buggy compiler.
-  Added a warning in png_decompress_chunk() when it runs out of data, e.g.
-    when it tries to read an erroneous PhotoShop iCCP chunk.
-  Added PNG_USE_DLL macro.
-  Revised the copyright/disclaimer/license notice.
-  Added contrib/msvctest directory
-
-Version 1.0.7rc1 [June 9, 2000]
-  Corrected the definition of PNG_TRANSFORM_INVERT_ALPHA  (0x0400 not 0x0200)
-  Added contrib/visupng directory (Willem van Schaik)
-
-Version 1.0.7beta18 [June 23, 2000]
-  Revised PNGAPI definition, and pngvcrd.c to work with __GCC__
-    and do not redefine PNGAPI if it is passed in via a compiler directive.
-  Revised visupng/PngFile.c to remove returns from within the Try block.
-  Removed leading underscores from "_PNG_H" and "_PNG_SAVE_BSD_SOURCE" macros.
-  Updated contrib/visupng/cexcept.h to version 1.0.0.
-  Fixed bugs in pngwrite.c and pngwutil.c that prevented writing iCCP chunks.
-
-Version 1.0.7rc2 [June 28, 2000]
-  Updated license to include disclaimers required by UCITA.
-  Fixed "DJBPP" typo in pnggccrd.c introduced in beta18.
-
-Version 1.0.7 [July 1, 2000]
-  Revised the definition of "trans_values" in libpng.3/libpng.txt
-
-Version 1.0.8beta1 [July 8, 2000]
-  Added png_free(png_ptr, key) two places in pngpread.c to stop memory leaks.
-  Changed PNG_NO_STDIO to PNG_NO_CONSOLE_IO, several places in pngrutil.c and
-    pngwutil.c.
-  Changed PNG_EXPORT_VAR to use PNG_IMPEXP, in pngconf.h.
-  Removed unused "#include <assert.h>" from png.c
-  Added WindowsCE support.
-  Revised pnggccrd.c to work with gcc-2.95.2 and in the Cygwin environment.
-
-Version 1.0.8beta2 [July 10, 2000]
-  Added project files to the wince directory and made further revisions
-    of pngtest.c, pngrio.c, and pngwio.c in support of WindowsCE.
-
-Version 1.0.8beta3 [July 11, 2000]
-  Only set the PNG_FLAG_FREE_TRNS or PNG_FREE_TRNS flag in png_handle_tRNS()
-    for indexed-color input files to avoid potential double-freeing trans array
-    under some unusual conditions; problem was introduced in version 1.0.6f.
-  Further revisions to pngtest.c and files in the wince subdirectory.
-
-Version 1.0.8beta4 [July 14, 2000]
-  Added the files pngbar.png and pngbar.jpg to the distribution.
-  Added makefile.cygwin, and cygwin support in pngconf.h
-  Added PNG_NO_ZALLOC_ZERO macro (makes png_zalloc skip zeroing memory)
-
-Version 1.0.8rc1 [July 16, 2000]
-  Revised png_debug() macros and statements to eliminate compiler warnings.
-
-Version 1.0.8 [July 24, 2000]
-  Added png_flush() in pngwrite.c, after png_write_IEND().
-  Updated makefile.hpux to build a shared library.
-
-Version 1.0.9beta1 [November 10, 2000]
-  Fixed typo in scripts/makefile.hpux
-  Updated makevms.com in scripts and contrib/* and contrib/* (Martin Zinser)
-  Fixed sequence-point bug in contrib/pngminus/png2pnm (Martin Zinser)
-  Changed "cdrom.com" in documentation to "libpng.org"
-  Revised pnggccrd.c to get it all working, and updated makefile.gcmmx (Greg).
-  Changed type of "params" from voidp to png_voidp in png_read|write_png().
-  Make sure PNGAPI and PNG_IMPEXP are defined in pngconf.h.
-  Revised the 3 instances of WRITEFILE in pngtest.c.
-  Relocated "msvc" and "wince" project subdirectories into "dll" subdirectory.
-  Updated png.rc in dll/msvc project
-  Revised makefile.dec to define and use LIBPATH and INCPATH
-  Increased size of global png_libpng_ver[] array from 12 to 18 chars.
-  Made global png_libpng_ver[], png_sig[] and png_pass_*[] arrays const.
-  Removed duplicate png_crc_finish() from png_handle_bKGD() function.
-  Added a warning when application calls png_read_update_info() multiple times.
-  Revised makefile.cygwin
-  Fixed bugs in iCCP support in pngrutil.c and pngwutil.c.
-  Replaced png_set_empty_plte_permitted() with png_permit_mng_features().
-
-Version 1.0.9beta2 [November 19, 2000]
-  Renamed the "dll" subdirectory "projects".
-  Added borland project files to "projects" subdirectory.
-  Set VS_FF_PRERELEASE and VS_FF_PATCHED flags in msvc/png.rc when appropriate.
-  Add error message in png_set_compression_buffer_size() when malloc fails.
-
-Version 1.0.9beta3 [November 23, 2000]
-  Revised PNG_LIBPNG_BUILD_TYPE macro in png.h, used in the msvc project.
-  Removed the png_flush() in pngwrite.c that crashes some applications
-    that don't set png_output_flush_fn.
-  Added makefile.macosx and makefile.aix to scripts directory.
-
-Version 1.0.9beta4 [December 1, 2000]
-  Change png_chunk_warning to png_warning in png_check_keyword().
-  Increased the first part of msg buffer from 16 to 18 in png_chunk_error().
-
-Version 1.0.9beta5 [December 15, 2000]
-  Added support for filter method 64 (for PNG datastreams embedded in MNG).
-
-Version 1.0.9beta6 [December 18, 2000]
-  Revised png_set_filter() to accept filter method 64 when appropriate.
-  Added new PNG_HAVE_PNG_SIGNATURE bit to png_ptr->mode and use it to
-    help prevent applications from using MNG features in PNG datastreams.
-  Added png_permit_mng_features() function.
-  Revised libpng.3/libpng.txt.  Changed "filter type" to "filter method".
-
-Version 1.0.9rc1 [December 23, 2000]
-  Revised test for PNG_HAVE_PNG_SIGNATURE in pngrutil.c
-  Fixed error handling of unknown compression type in png_decompress_chunk().
-  In pngconf.h, define __cdecl when _MSC_VER is defined.
-
-Version 1.0.9beta7 [December 28, 2000]
-  Changed PNG_TEXT_COMPRESSION_zTXt to PNG_COMPRESSION_TYPE_BASE several places.
-  Revised memory management in png_set_hIST and png_handle_hIST in a backward
-    compatible manner.  PLTE and tRNS were revised similarly.
-  Revised the iCCP chunk reader to ignore trailing garbage.
-
-Version 1.0.9beta8 [January 12, 2001]
-  Moved pngasmrd.h into pngconf.h.
-  Improved handling of out-of-spec garbage iCCP chunks generated by PhotoShop.
-
-Version 1.0.9beta9 [January 15, 2001]
-  Added png_set_invalid, png_permit_mng_features, and png_mmx_supported to
-    wince and msvc project module definition files.
-  Minor revision of makefile.cygwin.
-  Fixed bug with progressive reading of narrow interlaced images in pngpread.c
-
-Version 1.0.9beta10 [January 16, 2001]
-  Do not typedef png_FILE_p in pngconf.h when PNG_NO_STDIO is defined.
-  Fixed "png_mmx_supported" typo in project definition files.
-
-Version 1.0.9beta11 [January 19, 2001]
-  Updated makefile.sgi to make shared library.
-  Removed png_mmx_support() function and disabled PNG_MNG_FEATURES_SUPPORTED
-    by default, for the benefit of DLL forward compatibility.  These will
-    be re-enabled in version 1.2.0.
-
-Version 1.0.9rc2 [January 22, 2001]
-  Revised cygwin support.
-
-Version 1.0.9 [January 31, 2001]
-  Added check of cygwin's ALL_STATIC in pngconf.h
-  Added "-nommx" parameter to contrib/gregbook/rpng2-win and rpng2-x demos.
-
-Version 1.0.10beta1 [March 14, 2001]
-  Revised makefile.dec, makefile.sgi, and makefile.sggcc; added makefile.hpgcc.
-  Reformatted libpng.3 to eliminate bad line breaks.
-  Added checks for _mmx_supported in the read_filter_row function of pnggccrd.c
-  Added prototype for png_mmx_support() near the top of pnggccrd.c
-  Moved some error checking from png_handle_IHDR to png_set_IHDR.
-  Added PNG_NO_READ_SUPPORTED and PNG_NO_WRITE_SUPPORTED macros.
-  Revised png_mmx_support() function in pnggccrd.c
-  Restored version 1.0.8 PNG_WRITE_EMPTY_PLTE_SUPPORTED behavior in pngwutil.c
-  Fixed memory leak in contrib/visupng/PngFile.c
-  Fixed bugs in png_combine_row() in pnggccrd.c and pngvcrd.c (C version)
-  Added warnings when retrieving or setting gamma=0.
-  Increased the first part of msg buffer from 16 to 18 in png_chunk_warning().
-
-Version 1.0.10rc1 [March 23, 2001]
-  Changed all instances of memcpy, strcpy, and strlen to png_memcpy, png_strcpy,
-    and png_strlen.
-  Revised png_mmx_supported() function in pnggccrd.c to return proper value.
-  Fixed bug in progressive reading (pngpread.c) with small images (height < 8).
-
-Version 1.0.10 [March 30, 2001]
-  Deleted extraneous space (introduced in 1.0.9) from line 42 of makefile.cygwin
-  Added beos project files (Chris Herborth)
-
-Version 1.0.11beta1 [April 3, 2001]
-  Added type casts on several png_malloc() calls (Dimitri Papadapoulos).
-  Removed a no-longer needed AIX work-around from pngconf.h
-  Changed several "//" single-line comments to C-style in pnggccrd.c
-
-Version 1.0.11beta2 [April 11, 2001]
-  Removed PNGAPI from several functions whose prototypes did not have PNGAPI.
-  Updated scripts/pngos2.def
-
-Version 1.0.11beta3 [April 14, 2001]
-  Added checking the results of many instances of png_malloc() for NULL
-
-Version 1.0.11beta4 [April 20, 2001]
-  Undid the changes from version 1.0.11beta3.  Added a check for NULL return
-    from user's malloc_fn().
-  Removed some useless type casts of the NULL pointer.
-  Added makefile.netbsd
-
-Version 1.0.11 [April 27, 2001]
-  Revised makefile.netbsd
-
-Version 1.0.12beta1 [May 14, 2001]
-  Test for Windows platform in pngconf.h when including malloc.h (Emmanuel Blot)
-  Updated makefile.cygwin and handling of Cygwin's ALL_STATIC in pngconf.h
-  Added some never-to-be-executed code in pnggccrd.c to quiet compiler warnings.
-  Eliminated the png_error about apps using png_read|write_init().  Instead,
-    libpng will reallocate the png_struct and info_struct if they are too small.
-    This retains future binary compatibility for old applications written for
-    libpng-0.88 and earlier.
-
-Version 1.2.0beta1 [May 6, 2001]
-  Bumped DLLNUM to 2.
-  Re-enabled PNG_MNG_FEATURES_SUPPORTED and enabled PNG_ASSEMBLER_CODE_SUPPORTED
-    by default.
-  Added runtime selection of MMX features.
-  Added png_set_strip_error_numbers function and related macros.
-
-Version 1.2.0beta2 [May 7, 2001]
-  Finished merging 1.2.0beta1 with version 1.0.11
-  Added a check for attempts to read or write PLTE in grayscale PNG datastreams.
-
-Version 1.2.0beta3 [May 17, 2001]
-  Enabled user memory function by default.
-  Modified png_create_struct so it passes user mem_ptr to user memory allocator.
-  Increased png_mng_features flag from png_byte to png_uint_32.
-  Bumped shared-library (so-number) and dll-number to 3.
-
-Version 1.2.0beta4 [June 23, 2001]
-  Check for missing profile length field in iCCP chunk and free chunk_data
-    in case of truncated iCCP chunk.
-  Bumped shared-library number to 3 in makefile.sgi and makefile.sggcc
-  Bumped dll-number from 2 to 3 in makefile.cygwin
-  Revised contrib/gregbook/rpng*-x.c to avoid a memory leak and to exit cleanly
-    if user attempts to run it on an 8-bit display.
-  Updated contrib/gregbook
-  Use png_malloc instead of png_zalloc to allocate palette in pngset.c
-  Updated makefile.ibmc
-  Added some typecasts to eliminate gcc 3.0 warnings.  Changed prototypes
-    of png_write_oFFS width and height from png_uint_32 to png_int_32.
-  Updated example.c
-  Revised prototypes for png_debug_malloc and png_debug_free in pngtest.c
-
-Version 1.2.0beta5 [August 8, 2001]
-  Revised contrib/gregbook
-  Revised makefile.gcmmx
-  Revised pnggccrd.c to conditionally compile some thread-unsafe code only
-    when PNG_THREAD_UNSAFE_OK is defined.
-  Added tests to prevent pngwutil.c from writing a bKGD or tRNS chunk with
-    value exceeding 2^bit_depth-1
-  Revised makefile.sgi and makefile.sggcc
-  Replaced calls to fprintf(stderr,...) with png_warning() in pnggccrd.c
-  Removed restriction that do_invert_mono only operate on 1-bit opaque files
-
-Version 1.2.0 [September 1, 2001]
-  Changed a png_warning() to png_debug() in pnggccrd.c
-  Fixed contrib/gregbook/rpng-x.c, rpng2-x.c to avoid crash with XFreeGC().
-
-Version 1.2.1beta1 [October 19, 2001]
-  Revised makefile.std in contrib/pngminus
-  Include background_1 in png_struct regardless of gamma support.
-  Revised makefile.netbsd and makefile.macosx, added makefile.darwin.
-  Revised example.c to provide more details about using row_callback().
-
-Version 1.2.1beta2 [October 25, 2001]
-  Added type cast to each NULL appearing in a function call, except for
-    WINCE functions.
-  Added makefile.so9.
-
-Version 1.2.1beta3 [October 27, 2001]
-  Removed type casts from all NULLs.
-  Simplified png_create_struct_2().
-
-Version 1.2.1beta4 [November 7, 2001]
-  Revised png_create_info_struct() and png_creat_struct_2().
-  Added error message if png_write_info() was omitted.
-  Type cast NULLs appearing in function calls when _NO_PROTO or
-    PNG_TYPECAST_NULL is defined.
-
-Version 1.2.1rc1 [November 24, 2001]
-  Type cast NULLs appearing in function calls except when PNG_NO_TYPECAST_NULL
-    is defined.
-  Changed typecast of "size" argument to png_size_t in pngmem.c calls to
-    the user malloc_fn, to agree with the prototype in png.h
-  Added a pop/push operation to pnggccrd.c, to preserve Eflag (Maxim Sobolev)
-  Updated makefile.sgi to recognize LIBPATH and INCPATH.
-  Updated various makefiles so "make clean" does not remove previous major
-    version of the shared library.
-
-Version 1.2.1rc2 [December 4, 2001]
-  Always allocate 256-entry internal palette, hist, and trans arrays, to
-    avoid out-of-bounds memory reference caused by invalid PNG datastreams.
-  Added a check for prefix_length > data_length in iCCP chunk handler.
-
-Version 1.2.1 [December 7, 2001]
-  None.
-
-Version 1.2.2beta1 [February 22, 2002]
-  Fixed a bug with reading the length of iCCP profiles (Larry Reeves).
-  Revised makefile.linux, makefile.gcmmx, and makefile.sgi to generate
-    libpng.a, libpng12.so (not libpng.so.3), and libpng12/png.h
-  Revised makefile.darwin to remove "-undefined suppress" option.
-  Added checks for gamma and chromaticity values over 21474.83, which exceed
-    the limit for PNG unsigned 32-bit integers when encoded.
-  Revised calls to png_create_read_struct() and png_create_write_struct()
-    for simpler debugging.
-  Revised png_zalloc() so zlib handles errors (uses PNG_FLAG_MALLOC_NULL_MEM_OK)
-
-Version 1.2.2beta2 [February 23, 2002]
-  Check chunk_length and idat_size for invalid (over PNG_MAX_UINT) lengths.
-  Check for invalid image dimensions in png_get_IHDR.
-  Added missing "fi;" in the install target of the SGI makefiles.
-  Added install-static to all makefiles that make shared libraries.
-  Always do gamma compensation when image is partially transparent.
-
-Version 1.2.2beta3 [March 7, 2002]
-  Compute background.gray and background_1.gray even when color_type is RGB
-    in case image gets reduced to gray later.
-  Modified shared-library makefiles to install pkgconfig/libpngNN.pc.
-  Export (with PNGAPI) png_zalloc, png_zfree, and png_handle_as_unknown
-  Removed unused png_write_destroy_info prototype from png.h
-  Eliminated incorrect use of width_mmx from pnggccrd.c in pixel_bytes == 8 case
-  Added install-shared target to all makefiles that make shared libraries.
-  Stopped a double free of palette, hist, and trans when not using free_me.
-  Added makefile.32sunu for Sun Ultra 32 and makefile.64sunu for Sun Ultra 64.
-
-Version 1.2.2beta4 [March 8, 2002]
-  Compute background.gray and background_1.gray even when color_type is RGB
-    in case image gets reduced to gray later (Jason Summers).
-  Relocated a misplaced /bin/rm in the "install-shared" makefile targets
-  Added PNG_1_0_X macro which can be used to build a 1.0.x-compatible library.
-
-Version 1.2.2beta5 [March 26, 2002]
-  Added missing PNGAPI to several function definitions.
-  Check for invalid bit_depth or color_type in png_get_IHDR(), and
-    check for missing PLTE or IHDR in png_push_read_chunk() (Matthias Clasen).
-  Revised iTXt support to accept NULL for lang and lang_key.
-  Compute gamma for color components of background even when color_type is gray.
-  Changed "()" to "{}" in scripts/libpng.pc.in.
-  Revised makefiles to put png.h and pngconf.h only in $prefix/include/libpngNN
-  Revised makefiles to make symlink to libpng.so.NN in addition to libpngNN.so
-
-Version 1.2.2beta6 [March 31, 2002]
-
-Version 1.0.13beta1 [March 31, 2002]
-  Prevent png_zalloc() from trying to memset memory that it failed to acquire.
-  Add typecasts of PNG_MAX_UINT in pngset_cHRM_fixed() (Matt Holgate).
-  Ensure that the right function (user or default) is used to free the
-    png_struct after an error in png_create_read_struct_2().
-
-Version 1.2.2rc1 [April 7, 2002]
-
-Version 1.0.13rc1 [April 7, 2002]
-  Save the ebx register in pnggccrd.c (Sami Farin)
-  Add "mem_ptr = png_ptr->mem_ptr" in png_destroy_write_struct() (Paul Gardner).
-  Updated makefiles to put headers in include/libpng and remove old include/*.h.
-
-Version 1.2.2 [April 15, 2002]
-
-Version 1.0.13 [April 15, 2002]
-  Revised description of png_set_filter() in libpng.3/libpng.txt.
-  Revised makefile.netbsd and added makefile.neNNbsd and makefile.freebsd
-
-Version 1.0.13patch01 [April 17, 2002]
-
-Version 1.2.2patch01 [April 17, 2002]
-  Changed ${PNGMAJ}.${PNGVER} bug to ${PNGVER} in makefile.sgi and
-    makefile.sggcc
-  Fixed VER -> PNGVER typo in makefile.macosx and added install-static to
-    install
-  Added install: target to makefile.32sunu and makefile.64sunu
-
-Version 1.0.13patch03 [April 18, 2002]
-
-Version 1.2.2patch03 [April 18, 2002]
-  Revised 15 makefiles to link libpng.a to libpngNN.a and the include libpng
-  subdirectory to libpngNN subdirectory without the full pathname.
-  Moved generation of libpng.pc from "install" to "all" in 15 makefiles.
-
-Version 1.2.3rc1 [April 28, 2002]
-  Added install-man target to 15 makefiles (Dimitri Papadopolous-Orfanos).
-  Added $(DESTDIR) feature to 24 makefiles (Tim Mooney)
-  Fixed bug with $prefix, should be $(prefix) in makefile.hpux.
-  Updated cygwin-specific portion of pngconf.h and revised makefile.cygwin
-  Added a link from libpngNN.pc to libpng.pc in 15 makefiles.
-  Added links from include/libpngNN/*.h to include/*.h in 24 makefiles.
-  Revised makefile.darwin to make relative links without full pathname.
-  Added setjmp() at the end of png_create_*_struct_2() in case user forgets
-    to put one in their application.
-  Restored png_zalloc() and png_zfree() prototypes to version 1.2.1 and
-    removed them from module definition files.
-
-Version 1.2.3rc2 [May 1, 2002]
-  Fixed bug in reporting number of channels in pngget.c and pngset.c,
-    that was introduced in version 1.2.2beta5.
-  Exported png_zalloc(), png_zfree(), png_default_read(), png_default_write(),
-    png_default_flush(), and png_push_fill_buffer() and included them in
-    module definition files.
-  Added "libpng.pc" dependency to the "install-shared" target in 15 makefiles.
-
-Version 1.2.3rc3 [May 1, 2002]
-  Revised prototype for png_default_flush()
-  Remove old libpng.pc and libpngNN.pc before installing new ones.
-
-Version 1.2.3rc4 [May 2, 2002]
-  Typos in *.def files (png_default_read|write -> png_default_read|write_data)
-  In makefiles, changed rm libpng.NN.pc to rm libpngNN.pc
-  Added libpng-config and libpngNN-config and modified makefiles to install
-    them.
-  Changed $(MANPATH) to $(DESTDIR)$(MANPATH) in makefiles
-  Added "Win32 DLL VB" configuration to projects/msvc/libpng.dsp
-
-Version 1.2.3rc5 [May 11, 2002]
-  Changed "error" and "message" in prototypes to "error_message" and
-    "warning_message" to avoid namespace conflict.
-  Revised 15 makefiles to build libpng-config from libpng-config-*.in
-  Once more restored png_zalloc and png_zfree to regular nonexported form.
-  Restored png_default_read|write_data, png_default_flush, png_read_fill_buffer
-    to nonexported form, but with PNGAPI, and removed them from module def
-    files.
-
-Version 1.2.3rc6 [May 14, 2002]
-  Removed "PNGAPI" from png_zalloc() and png_zfree() in png.c
-  Changed "Gz" to "Gd" in projects/msvc/libpng.dsp and zlib.dsp.
-  Removed leftover libpng-config "sed" script from four makefiles.
-  Revised libpng-config creating script in 16 makefiles.
-
-Version 1.2.3 [May 22, 2002]
-  Revised libpng-config target in makefile.cygwin.
-  Removed description of png_set_mem_fn() from documentation.
-  Revised makefile.freebsd.
-  Minor cosmetic changes to 15 makefiles, e.g., $(DI) = $(DESTDIR)/$(INCDIR).
-  Revised projects/msvc/README.txt
-  Changed -lpng to -lpngNN in LDFLAGS in several makefiles.
-
-Version 1.2.4beta1 [May 24, 2002]
-  Added libpng.pc and libpng-config to "all:" target in 16 makefiles.
-  Fixed bug in 16 makefiles: $(DESTDIR)/$(LIBPATH) to $(DESTDIR)$(LIBPATH)
-  Added missing "\" before closing double quote in makefile.gcmmx.
-  Plugged various memory leaks; added png_malloc_warn() and png_set_text_2()
-    functions.
-
-Version 1.2.4beta2 [June 25, 2002]
-  Plugged memory leak of png_ptr->current_text (Matt Holgate).
-  Check for buffer overflow before reading CRC in pngpread.c (Warwick Allison)
-  Added -soname to the loader flags in makefile.dec, makefile.sgi, and
-    makefile.sggcc.
-  Added "test-installed" target to makefile.linux, makefile.gcmmx,
-    makefile.sgi, and makefile.sggcc.
-
-Version 1.2.4beta3 [June 28, 2002]
-  Plugged memory leak of row_buf in pngtest.c when there is a png_error().
-  Detect buffer overflow in pngpread.c when IDAT is corrupted with extra data.
-  Added "test-installed" target to makefile.32sunu, makefile.64sunu,
-    makefile.beos, makefile.darwin, makefile.dec, makefile.macosx,
-    makefile.solaris, makefile.hpux, makefile.hpgcc, and makefile.so9.
-
-Version 1.2.4rc1 and 1.0.14rc1 [July 2, 2002]
-  Added "test-installed" target to makefile.cygwin and makefile.sco.
-  Revised pnggccrd.c to be able to back out version 1.0.x via PNG_1_0_X macro.
-
-Version 1.2.4 and 1.0.14 [July 8, 2002]
-  Changed png_warning() to png_error() when width is too large to process.
-
-Version 1.2.4patch01 [July 20, 2002]
-  Revised makefile.cygwin to use DLL number 12 instead of 13.
-
-Version 1.2.5beta1 [August 6, 2002]
-  Added code to contrib/gregbook/readpng2.c to ignore unused chunks.
-  Replaced toucan.png in contrib/gregbook (it has been corrupt since 1.0.11)
-  Removed some stray *.o files from contrib/gregbook.
-  Changed png_error() to png_warning() about "Too much data" in pngpread.c
-    and about "Extra compressed data" in pngrutil.c.
-  Prevent png_ptr->pass from exceeding 7 in png_push_finish_row().
-  Updated makefile.hpgcc
-  Updated png.c and pnggccrd.c handling of return from png_mmx_support()
-
-Version 1.2.5beta2 [August 15, 2002]
-  Only issue png_warning() about "Too much data" in pngpread.c when avail_in
-    is nonzero.
-  Updated makefiles to install a separate libpng.so.3 with its own rpath.
-
-Version 1.2.5rc1 and 1.0.15rc1 [August 24, 2002]
-  Revised makefiles to not remove previous minor versions of shared libraries.
-
-Version 1.2.5rc2 and 1.0.15rc2 [September 16, 2002]
-  Revised 13 makefiles to remove "-lz" and "-L$(ZLIBLIB)", etc., from shared
-    library loader directive.
-  Added missing "$OBJSDLL" line to makefile.gcmmx.
-  Added missing "; fi" to makefile.32sunu.
-
-Version 1.2.5rc3 and 1.0.15rc3 [September 18, 2002]
-  Revised libpng-config script.
-
-Version 1.2.5 and 1.0.15 [October 3, 2002]
-  Revised makefile.macosx, makefile.darwin, makefile.hpgcc, and makefile.hpux,
-    and makefile.aix.
-  Relocated two misplaced PNGAPI lines in pngtest.c
-
-Version 1.2.6beta1 [October 22, 2002]
-  Commented out warning about uninitialized mmx_support in pnggccrd.c.
-  Changed "IBMCPP__" flag to "__IBMCPP__" in pngconf.h.
-  Relocated two more misplaced PNGAPI lines in pngtest.c
-  Fixed memory overrun bug in png_do_read_filler() with 16-bit datastreams,
-    introduced in version 1.0.2.
-  Revised makefile.macosx, makefile.dec, makefile.aix, and makefile.32sunu.
-
-Version 1.2.6beta2 [November 1, 2002]
-  Added libpng-config "--ldopts" output.
-  Added "AR=ar" and "ARFLAGS=rc" and changed "ar rc" to "$(AR) $(ARFLAGS)"
-    in makefiles.
-
-Version 1.2.6beta3 [July 18, 2004]
-  Reverted makefile changes from version 1.2.6beta2 and some of the changes
-    from version 1.2.6beta1; these will be postponed until version 1.2.7.
-    Version 1.2.6 is going to be a simple bugfix release.
-  Changed the one instance of "ln -sf" to "ln -f -s" in each Sun makefile.
-  Fixed potential overrun in pngerror.c by using strncpy instead of memcpy.
-  Added "#!/bin/sh" at the top of configure, for recognition of the
-    'x' flag under Cygwin (Cosmin).
-  Optimized vacuous tests that silence compiler warnings, in png.c (Cosmin).
-  Added support for PNG_USER_CONFIG, in pngconf.h (Cosmin).
-  Fixed the special memory handler for Borland C under DOS, in pngmem.c
-    (Cosmin).
-  Removed some spurious assignments in pngrutil.c (Cosmin).
-  Replaced 65536 with 65536L, and 0xffff with 0xffffL, to silence warnings
-    on 16-bit platforms (Cosmin).
-  Enclosed shift op expressions in parentheses, to silence warnings (Cosmin).
-  Used proper type png_fixed_point, to avoid problems on 16-bit platforms,
-    in png_handle_sRGB() (Cosmin).
-  Added compression_type to png_struct, and optimized the window size
-    inside the deflate stream (Cosmin).
-  Fixed definition of isnonalpha(), in pngerror.c and pngrutil.c (Cosmin).
-  Fixed handling of unknown chunks that come after IDAT (Cosmin).
-  Allowed png_error() and png_warning() to work even if png_ptr == NULL
-    (Cosmin).
-  Replaced row_info->rowbytes with row_bytes in png_write_find_filter()
-    (Cosmin).
-  Fixed definition of PNG_LIBPNG_VER_DLLNUM (Simon-Pierre).
-  Used PNG_LIBPNG_VER and PNG_LIBPNG_VER_STRING instead of the hardcoded
-    values in png.c (Simon-Pierre, Cosmin).
-  Initialized png_libpng_ver[] with PNG_LIBPNG_VER_STRING (Simon-Pierre).
-  Replaced PNG_LIBPNG_VER_MAJOR with PNG_LIBPNG_VER_DLLNUM in png.rc
-    (Simon-Pierre).
-  Moved the definition of PNG_HEADER_VERSION_STRING near the definitions
-    of the other PNG_LIBPNG_VER_... symbols in png.h (Cosmin).
-  Relocated #ifndef PNGAPI guards in pngconf.h (Simon-Pierre, Cosmin).
-  Updated scripts/makefile.vc(a)win32 (Cosmin).
-  Updated the MSVC project (Simon-Pierre, Cosmin).
-  Updated the Borland C++ Builder project (Cosmin).
-  Avoided access to asm_flags in pngvcrd.c, if PNG_1_0_X is defined (Cosmin).
-  Commented out warning about uninitialized mmx_support in pngvcrd.c (Cosmin).
-  Removed scripts/makefile.bd32 and scripts/pngdef.pas (Cosmin).
-  Added extra guard around inclusion of Turbo C memory headers, in pngconf.h
-    (Cosmin).
-  Renamed projects/msvc/ to projects/visualc6/, and projects/borland/ to
-    projects/cbuilder5/ (Cosmin).
-  Moved projects/visualc6/png32ms.def to scripts/pngw32.def,
-    and projects/visualc6/png.rc to scripts/pngw32.rc (Cosmin).
-  Added projects/visualc6/pngtest.dsp; removed contrib/msvctest/ (Cosmin).
-  Changed line endings to DOS style in cbuilder5 and visualc6 files, even
-    in the tar.* distributions (Cosmin).
-  Updated contrib/visupng/VisualPng.dsp (Cosmin).
-  Updated contrib/visupng/cexcept.h to version 2.0.0 (Cosmin).
-  Added a separate distribution with "configure" and supporting files (Junichi).
-
-Version 1.2.6beta4 [July 28, 2004]
-  Added user ability to change png_size_t via a PNG_SIZE_T macro.
-  Added png_sizeof() and png_convert_size() functions.
-  Added PNG_SIZE_MAX (maximum value of a png_size_t variable.
-  Added check in png_malloc_default() for (size_t)size != (png_uint_32)size
-    which would indicate an overflow.
-  Changed sPLT failure action from png_error to png_warning and abandon chunk.
-  Changed sCAL and iCCP failures from png_error to png_warning and abandon.
-  Added png_get_uint_31(png_ptr, buf) function.
-  Added PNG_UINT_32_MAX macro.
-  Renamed PNG_MAX_UINT to PNG_UINT_31_MAX.
-  Made png_zalloc() issue a png_warning and return NULL on potential
-    overflow.
-  Turn on PNG_NO_ZALLOC_ZERO by default in version 1.2.x
-  Revised "clobber list" in pnggccrd.c so it will compile under gcc-3.4.
-  Revised Borland portion of png_malloc() to return NULL or issue
-    png_error() according to setting of PNG_FLAG_MALLOC_NULL_MEM_OK.
-  Added PNG_NO_SEQUENTIAL_READ_SUPPORTED macro to conditionally remove
-    sequential read support.
-  Added some "#if PNG_WRITE_SUPPORTED" blocks.
-  Added #ifdef to remove some redundancy in png_malloc_default().
-  Use png_malloc instead of png_zalloc to allocate the palette.
-
-Version 1.0.16rc1 and 1.2.6rc1 [August 4, 2004]
-  Fixed buffer overflow vulnerability (CVE-2004-0597) in png_handle_tRNS().
-  Fixed NULL dereference vulnerability (CVE-2004-0598) in png_handle_iCCP().
-  Fixed integer overflow vulnerability (CVE-2004-0599) in png_read_png().
-  Fixed some harmless bugs in png_handle_sBIT, etc, that would cause
-    duplicate chunk types to go undetected.
-  Fixed some timestamps in the -config version
-  Rearranged order of processing of color types in png_handle_tRNS().
-  Added ROWBYTES macro to calculate rowbytes without integer overflow.
-  Updated makefile.darwin and removed makefile.macosx from scripts directory.
-  Imposed default one million column, one-million row limits on the image
-    dimensions, and added png_set_user_limits() function to override them.
-  Revised use of PNG_SET_USER_LIMITS_SUPPORTED macro.
-  Fixed wrong cast of returns from png_get_user_width|height_max().
-  Changed some "keep the compiler happy" from empty statements to returns,
-  Revised libpng.txt to remove 1.2.x stuff from the 1.0.x distribution
-
-Version 1.0.16rc2 and 1.2.6rc2 [August 7, 2004]
-  Revised makefile.darwin and makefile.solaris.  Removed makefile.macosx.
-  Revised pngtest's png_debug_malloc() to use png_malloc() instead of
-    png_malloc_default() which is not supposed to be exported.
-  Fixed off-by-one error in one of the conversions to PNG_ROWBYTES() in
-    pngpread.c.  Bug was introduced in 1.2.6rc1.
-  Fixed bug in RGB to RGBX transformation introduced in 1.2.6rc1.
-  Fixed old bug in RGB to Gray transformation.
-  Fixed problem with 64-bit compilers by casting arguments to abs()
-    to png_int_32.
-  Changed "ln -sf" to "ln -f -s" in three makefiles (solaris, sco, so9).
-  Changed "HANDLE_CHUNK_*" to "PNG_HANDLE_CHUNK_*" (Cosmin)
-  Added "-@/bin/rm -f $(DL)/$(LIBNAME).so.$(PNGMAJ)" to 15 *NIX makefiles.
-  Added code to update the row_info->colortype in png_do_read_filler() (MSB).
-
-Version 1.0.16rc3 and 1.2.6rc3 [August 9, 2004]
-  Eliminated use of "abs()" in testing cHRM and gAMA values, to avoid
-    trouble with some 64-bit compilers.  Created PNG_OUT_OF_RANGE() macro.
-  Revised documentation of png_set_keep_unknown_chunks().
-  Check handle_as_unknown status in pngpread.c, as in pngread.c previously.
-  Moved  "PNG_HANDLE_CHUNK_*" macros out of PNG_INTERNAL section of png.h
-  Added "rim" definitions for CONST4 and CONST6 in pnggccrd.c
-
-Version 1.0.16rc4 and 1.2.6rc4 [August 10, 2004]
-  Fixed mistake in pngtest.c introduced in 1.2.6rc2 (declaration of
-    "pinfo" was out of place).
-
-Version 1.0.16rc5 and 1.2.6rc5 [August 10, 2004]
-  Moved  "PNG_HANDLE_CHUNK_*" macros out of PNG_ASSEMBLER_CODE_SUPPORTED
-    section of png.h where they were inadvertently placed in version rc3.
-
-Version 1.2.6 and 1.0.16 [August 15, 2004]
-  Revised pngtest so memory allocation testing is only done when PNG_DEBUG==1.
-
-Version 1.2.7beta1 [August 26, 2004]
-  Removed unused pngasmrd.h file.
-  Removed references to uu.net for archived files.  Added references to
-    PNG Spec (second edition) and the PNG ISO/IEC Standard.
-  Added "test-dd" target in 15 makefiles, to run pngtest in DESTDIR.
-  Fixed bug with "optimized window size" in the IDAT datastream, that
-    causes libpng to write PNG files with incorrect zlib header bytes.
-
-Version 1.2.7beta2 [August 28, 2004]
-  Fixed bug with sCAL chunk and big-endian machines (David Munro).
-  Undid new code added in 1.2.6rc2 to update the color_type in
-    png_set_filler().
-  Added png_set_add_alpha() that updates color type.
-
-Version 1.0.17rc1 and 1.2.7rc1 [September 4, 2004]
-  Revised png_set_strip_filler() to not remove alpha if color_type has alpha.
-
-Version 1.2.7 and 1.0.17 [September 12, 2004]
-  Added makefile.hp64
-  Changed projects/msvc/png32ms.def to scripts/png32ms.def in makefile.cygwin
-
-Version 1.2.8beta1 [November 1, 2004]
-  Fixed bug in png_text_compress() that would fail to complete a large block.
-  Fixed bug, introduced in libpng-1.2.7, that overruns a buffer during
-    strip alpha operation in png_do_strip_filler().
-  Added PNG_1_2_X definition in pngconf.h
-  Use #ifdef to comment out png_info_init in png.c and png_read_init in
-    pngread.c (as of 1.3.0)
-
-Version 1.2.8beta2 [November 2, 2004]
-  Reduce color_type to a nonalpha type after strip alpha operation in
-    png_do_strip_filler().
-
-Version 1.2.8beta3 [November 3, 2004]
-  Revised definitions of PNG_MAX_UINT_32, PNG_MAX_SIZE, and PNG_MAXSUM
-
-Version 1.2.8beta4 [November 12, 2004]
-  Fixed (again) definition of PNG_LIBPNG_VER_DLLNUM in png.h (Cosmin).
-  Added PNG_LIBPNG_BUILD_PRIVATE in png.h (Cosmin).
-  Set png_ptr->zstream.data_type to Z_BINARY, to avoid unnecessary detection
-    of data type in deflate (Cosmin).
-  Deprecated but continue to support SPECIALBUILD and PRIVATEBUILD in favor of
-    PNG_LIBPNG_BUILD_SPECIAL_STRING and PNG_LIBPNG_BUILD_PRIVATE_STRING.
-
-Version 1.2.8beta5 [November 20, 2004]
-  Use png_ptr->flags instead of png_ptr->transformations to pass
-    PNG_STRIP_ALPHA info to png_do_strip_filler(), to preserve ABI
-    compatibility.
-  Revised handling of SPECIALBUILD, PRIVATEBUILD,
-    PNG_LIBPNG_BUILD_SPECIAL_STRING and PNG_LIBPNG_BUILD_PRIVATE_STRING.
-
-Version 1.2.8rc1 [November 24, 2004]
-  Moved handling of BUILD macros from pngconf.h to png.h
-  Added definition of PNG_LIBPNG_BASE_TYPE in png.h, inadvertently
-    omitted from beta5.
-  Revised scripts/pngw32.rc
-  Despammed mailing addresses by masking "@" with "at".
-  Inadvertently installed a supposedly faster test version of pngrutil.c
-
-Version 1.2.8rc2 [November 26, 2004]
-  Added two missing "\" in png.h
-  Change tests in pngread.c and pngpread.c to
-    if (png_ptr->transformations || (png_ptr->flags&PNG_FLAG_STRIP_ALPHA))
-       png_do_read_transformations(png_ptr);
-
-Version 1.2.8rc3 [November 28, 2004]
-  Reverted pngrutil.c to version libpng-1.2.8beta5.
-  Added scripts/makefile.elf with supporting code in pngconf.h for symbol
-    versioning (John Bowler).
-
-Version 1.2.8rc4 [November 29, 2004]
-  Added projects/visualc7 (Simon-pierre).
-
-Version 1.2.8rc5 [November 29, 2004]
-  Fixed new typo in scripts/pngw32.rc
-
-Version 1.2.8 [December 3, 2004]
-  Removed projects/visualc7, added projects/visualc71.
-
-Version 1.2.9beta1 [February 21, 2006]
-  Initialized some structure members in pngwutil.c to avoid gcc-4.0.0 complaints
-  Revised man page and libpng.txt to make it clear that one should not call
-    png_read_end or png_write_end after png_read_png or png_write_png.
-  Updated references to png-mng-implement mailing list.
-  Fixed an incorrect typecast in pngrutil.c
-  Added PNG_NO_READ_SUPPORTED conditional for making a write-only library.
-  Added PNG_NO_WRITE_INTERLACING_SUPPORTED conditional.
-  Optimized alpha-inversion loops in pngwtran.c
-  Moved test for nonzero gamma outside of png_build_gamma_table() in pngrtran.c
-  Make sure num_trans is <= 256 before copying data in png_set_tRNS().
-  Make sure num_palette is <= 256 before copying data in png_set_PLTE().
-  Interchanged order of write_swap_alpha and write_invert_alpha transforms.
-  Added parentheses in the definition of PNG_LIBPNG_BUILD_TYPE (Cosmin).
-  Optimized zlib window flag (CINFO) in contrib/pngsuite/*.png (Cosmin).
-  Updated scripts/makefile.bc32 for Borland C++ 5.6 (Cosmin).
-  Exported png_get_uint_32, png_save_uint_32, png_get_uint_16, png_save_uint_16,
-    png_get_int_32, png_save_int_32, png_get_uint_31 (Cosmin).
-  Added type cast (png_byte) in png_write_sCAL() (Cosmin).
-  Fixed scripts/makefile.cygwin (Christian Biesinger, Cosmin).
-  Default iTXt support was inadvertently enabled.
-
-Version 1.2.9beta2 [February 21, 2006]
-  Check for png_rgb_to_gray and png_gray_to_rgb read transformations before
-    checking for png_read_dither in pngrtran.c
-  Revised checking of chromaticity limits to accommodate extended RGB
-    colorspace (John Denker).
-  Changed line endings in some of the project files to CRLF, even in the
-    "Unix" tar distributions (Cosmin).
-  Made png_get_int_32 and png_save_int_32 always available (Cosmin).
-  Updated scripts/pngos2.def, scripts/pngw32.def and projects/wince/png32ce.def
-    with the newly exported functions.
-  Eliminated distributions without the "configure" script.
-  Updated INSTALL instructions.
-
-Version 1.2.9beta3 [February 24, 2006]
-  Fixed CRCRLF line endings in contrib/visupng/VisualPng.dsp
-  Made libpng.pc respect EXEC_PREFIX (D. P. Kreil, J. Bowler)
-  Removed reference to pngasmrd.h from Makefile.am
-  Renamed CHANGES to ChangeLog.
-  Renamed LICENSE to COPYING.
-  Renamed ANNOUNCE to NEWS.
-  Created AUTHORS file.
-
-Version 1.2.9beta4 [March 3, 2006]
-  Changed definition of PKGCONFIG from $prefix/lib to $libdir in configure.ac
-  Reverted to filenames LICENSE and ANNOUNCE; removed AUTHORS and COPYING.
-  Removed newline from the end of some error and warning messages.
-  Removed test for sqrt() from configure.ac and configure.
-  Made swap tables in pngtrans.c PNG_CONST (Carlo Bramix).
-  Disabled default iTXt support that was inadvertently enabled in
-    libpng-1.2.9beta1.
-  Added "OS2" to list of systems that don't need underscores, in pnggccrd.c
-  Removed libpng version and date from *.c files.
-
-Version 1.2.9beta5 [March 4, 2006]
-  Removed trailing blanks from source files.
-  Put version and date of latest change in each source file, and changed
-    copyright year accordingly.
-  More cleanup of configure.ac, Makefile.am, and associated scripts.
-  Restored scripts/makefile.elf which was inadvertently deleted.
-
-Version 1.2.9beta6 [March 6, 2006]
-  Fixed typo (RELEASE) in configuration files.
-
-Version 1.2.9beta7 [March 7, 2006]
-  Removed libpng.vers and libpng.sym from libpng12_la_SOURCES in Makefile.am
-  Fixed inconsistent #ifdef's around png_sig_bytes() and png_set_sCAL_s()
-    in png.h.
-  Updated makefile.elf as suggested by debian.
-  Made cosmetic changes to some makefiles, adding LN_SF and other macros.
-  Made some makefiles accept "exec_prefix".
-
-Version 1.2.9beta8 [March 9, 2006]
-  Fixed some "#if defined (..." which should be "#if defined(..."
-    Bug introduced in libpng-1.2.8.
-  Fixed inconsistency in definition of png_default_read_data()
-  Restored blank that was lost from makefile.sggcc "clean" target in beta7.
-  Revised calculation of "current" and "major" for irix in ltmain.sh
-  Changed "mkdir" to "MKDIR_P" in some makefiles.
-  Separated PNG_EXPAND and PNG_EXPAND_tRNS.
-  Added png_set_expand_gray_1_2_4_to_8() and deprecated
-    png_set_gray_1_2_4_to_8() which also expands tRNS to alpha.
-
-Version 1.2.9beta9 [March 10, 2006]
-  Include "config.h" in pngconf.h when available.
-  Added some checks for NULL png_ptr or NULL info_ptr (timeless)
-
-Version 1.2.9beta10 [March 20, 2006]
-  Removed extra CR from contrib/visualpng/VisualPng.dsw (Cosmin)
-  Made pnggccrd.c PIC-compliant (Christian Aichinger).
-  Added makefile.mingw (Wolfgang Glas).
-  Revised pngconf.h MMX checking.
-
-Version 1.2.9beta11 [March 22, 2006]
-  Fixed out-of-order declaration in pngwrite.c that was introduced in beta9
-  Simplified some makefiles by using LIBSO, LIBSOMAJ, and LIBSOVER macros.
-
-Version 1.2.9rc1 [March 31, 2006]
-  Defined PNG_USER_PRIVATEBUILD when including "pngusr.h" (Cosmin).
-  Removed nonsensical assertion check from pngtest.c (Cosmin).
-
-Version 1.2.9 [April 14, 2006]
-  Revised makefile.beos and added "none" selector in ltmain.sh
-
-Version 1.2.10beta1 [April 15, 2006]
-  Renamed "config.h" to "png_conf.h" and revised Makefile.am to add
-    -DPNG_BUILDING_LIBPNG to compile directive, and modified pngconf.h
-    to include png_conf.h only when PNG_BUILDING_LIBPNG is defined.
-
-Version 1.2.10beta2 [April 15, 2006]
-  Manually updated Makefile.in and configure.  Changed png_conf.h.in
-    back to config.h.
-
-Version 1.2.10beta3 [April 15, 2006]
-  Change png_conf.h back to config.h in pngconf.h.
-
-Version 1.2.10beta4 [April 16, 2006]
-  Change PNG_BUILDING_LIBPNG to PNG_CONFIGURE_LIBPNG in config/Makefile*.
-
-Version 1.2.10beta5 [April 16, 2006]
-  Added a configure check for compiling assembler code in pnggccrd.c
-
-Version 1.2.10beta6 [April 17, 2006]
-  Revised the configure check for pnggccrd.c
-  Moved -DPNG_CONFIGURE_LIBPNG into @LIBPNG_DEFINES@
-  Added @LIBPNG_DEFINES@ to arguments when building libpng.sym
-
-Version 1.2.10beta7 [April 18, 2006]
-  Change "exec_prefix=$prefix" to "exec_prefix=$(prefix)" in makefiles.
-
-Version 1.2.10rc1 [April 19, 2006]
-  Ensure pngconf.h doesn't define both PNG_USE_PNGGCCRD and PNG_USE_PNGVCRD
-  Fixed "LN_FS" typo in makefile.sco and makefile.solaris.
-
-Version 1.2.10rc2 [April 20, 2006]
-  Added a backslash between -DPNG_CONFIGURE_LIBPNG and -DPNG_NO_ASSEMBLER_CODE
-   in configure.ac and configure
-  Made the configure warning about versioned symbols less arrogant.
-
-Version 1.2.10rc3 [April 21, 2006]
-  Added a note in libpng.txt that png_set_sig_bytes(8) can be used when
-    writing an embedded PNG without the 8-byte signature.
-  Revised makefiles and configure to avoid making links to libpng.so.*
-
-Version 1.2.10 [April 23, 2006]
-  Reverted configure to "rc2" state.
-
-Version 1.2.11beta1 [May 31, 2006]
-  scripts/libpng.pc.in contained "configure" style version info and would
-    not work with makefiles.
-  The shared-library makefiles were linking to libpng.so.0 instead of
-    libpng.so.3 compatibility as the library.
-
-Version 1.2.11beta2 [June 2, 2006]
-  Increased sprintf buffer from 50 to 52 chars in pngrutil.c to avoid
-    buffer overflow.
-  Fixed bug in example.c (png_set_palette_rgb -> png_set_palette_to_rgb)
-
-Version 1.2.11beta3 [June 5, 2006]
-  Prepended "#! /bin/sh" to ltmail.sh and contrib/pngminus/*.sh (Cosmin).
-  Removed the accidental leftover Makefile.in~ (Cosmin).
-  Avoided potential buffer overflow and optimized buffer in
-    png_write_sCAL(), png_write_sCAL_s() (Cosmin).
-  Removed the include directories and libraries from CFLAGS and LDFLAGS
-    in scripts/makefile.gcc (Nelson A. de Oliveira, Cosmin).
-
-Version 1.2.11beta4 [June 6, 2006]
-  Allow zero-length IDAT chunks after the entire zlib datastream, but not
-    after another intervening chunk type.
-
-Version 1.0.19rc1, 1.2.11rc1 [June 13, 2006]
-  Deleted extraneous square brackets from [config.h] in configure.ac
-
-Version 1.0.19rc2, 1.2.11rc2 [June 14, 2006]
-  Added prototypes for PNG_INCH_CONVERSIONS functions to png.h
-  Revised INSTALL and autogen.sh
-  Fixed typo in several makefiles (-W1 should be -Wl)
-  Added typedef for png_int_32 and png_uint_32 on 64-bit systems.
-
-Version 1.0.19rc3, 1.2.11rc3 [June 15, 2006]
-  Removed the new typedefs for 64-bit systems (delay until version 1.4.0)
-  Added one zero element to png_gamma_shift[] array in pngrtran.c to avoid
-    reading out of bounds.
-
-Version 1.0.19rc4, 1.2.11rc4 [June 15, 2006]
-  Really removed the new typedefs for 64-bit systems.
-
-Version 1.0.19rc5, 1.2.11rc5 [June 22, 2006]
-  Removed png_sig_bytes entry from scripts/pngw32.def
-
-Version 1.0.19, 1.2.11 [June 26, 2006]
-  None.
-
-Version 1.0.20, 1.2.12 [June 27, 2006]
-  Really increased sprintf buffer from 50 to 52 chars in pngrutil.c to avoid
-    buffer overflow.
-
-Version 1.2.13beta1 [October 2, 2006]
-  Removed AC_FUNC_MALLOC from configure.ac
-  Work around Intel-Mac compiler bug by setting PNG_NO_MMX_CODE in pngconf.h
-  Change "logical" to "bitwise" throughout documentation.
-  Detect and fix attempt to write wrong iCCP profile length (CVE-2006-7244)
-
-Version 1.0.21, 1.2.13 [November 14, 2006]
-  Fix potential buffer overflow in sPLT chunk handler.
-  Fix Makefile.am to not try to link to noexistent files.
-  Check all exported functions for NULL png_ptr.
-
-Version 1.2.14beta1 [November 17, 2006]
-  Relocated three misplaced tests for NULL png_ptr.
-  Built Makefile.in with automake-1.9.6 instead of 1.9.2.
-  Build configure with autoconf-2.60 instead of 2.59
-
-Version 1.2.14beta2 [November 17, 2006]
-  Added some typecasts in png_zalloc().
-
-Version 1.2.14rc1 [November 20, 2006]
-  Changed "strtod" to "png_strtod" in pngrutil.c
-
-Version 1.0.22, 1.2.14    [November 27, 2006]
-  Added missing "$(srcdir)" in Makefile.am and Makefile.in
-
-Version 1.2.15beta1 [December 3, 2006]
-  Generated configure with autoconf-2.61 instead of 2.60
-  Revised configure.ac to update libpng.pc and libpng-config.
-
-Version 1.2.15beta2 [December 3, 2006]
-  Always export MMX asm functions, just stubs if not building pnggccrd.c
-
-Version 1.2.15beta3 [December 4, 2006]
-  Add "png_bytep" typecast to profile while calculating length in pngwutil.c
-
-Version 1.2.15beta4 [December 7, 2006]
-  Added scripts/CMakeLists.txt
-  Changed PNG_NO_ASSEMBLER_CODE to PNG_NO_MMX_CODE in scripts, like 1.4.0beta
-
-Version 1.2.15beta5 [December 7, 2006]
-  Changed some instances of PNG_ASSEMBLER_* to PNG_MMX_* in pnggccrd.c
-  Revised scripts/CMakeLists.txt
-
-Version 1.2.15beta6 [December 13, 2006]
-  Revised scripts/CMakeLists.txt and configure.ac
-
-Version 1.2.15rc1 [December 18, 2006]
-  Revised scripts/CMakeLists.txt
-
-Version 1.2.15rc2 [December 21, 2006]
-  Added conditional #undef jmpbuf in pngtest.c to undo #define in AIX headers.
-  Added scripts/makefile.nommx
-
-Version 1.2.15rc3 [December 25, 2006]
-  Fixed shared library numbering error that was introduced in 1.2.15beta6.
-
-Version 1.2.15rc4 [December 27, 2006]
-  Fixed handling of rgb_to_gray when png_ptr->color.gray isn't set.
-
-Version 1.2.15rc5 [December 31, 2006]
-  Revised handling of rgb_to_gray.
-
-Version 1.2.15 [January 5, 2007]
-  Added some (unsigned long) typecasts in pngtest.c to avoid printing errors.
-
-Version 1.2.16beta1 [January 6, 2007]
-  Fix bugs in makefile.nommx
-
-Version 1.2.16beta2 [January 16, 2007]
-  Revised scripts/CMakeLists.txt
-
-Version 1.2.16 [January 31, 2007]
-  No changes.
-
-Version 1.2.17beta1 [March 6, 2007]
-  Revised scripts/CMakeLists.txt to install both shared and static libraries.
-  Deleted a redundant line from pngset.c.
-
-Version 1.2.17beta2 [April 26, 2007]
-  Relocated misplaced test for png_ptr == NULL in pngpread.c
-  Change "==" to "&" for testing PNG_RGB_TO_GRAY_ERR & PNG_RGB_TO_GRAY_WARN
-    flags.
-  Changed remaining instances of PNG_ASSEMBLER_* to PNG_MMX_*
-  Added pngerror() when write_IHDR fails in deflateInit2().
-  Added "const" to some array declarations.
-  Mention examples of libpng usage in the libpng*.txt and libpng.3 documents.
-
-Version 1.2.17rc1 [May 4, 2007]
-  No changes.
-
-Version 1.2.17rc2 [May 8, 2007]
-  Moved several PNG_HAVE_* macros out of PNG_INTERNAL because applications
-    calling set_unknown_chunk_location() need them.
-  Changed transformation flag from PNG_EXPAND_tRNS to PNG_EXPAND in
-    png_set_expand_gray_1_2_4_to_8().
-  Added png_ptr->unknown_chunk to hold working unknown chunk data, so it
-    can be free'ed in case of error.  Revised unknown chunk handling in
-    pngrutil.c and pngpread.c to use this structure.
-
-Version 1.2.17rc3 [May 8, 2007]
-  Revised symbol-handling in configure script.
-
-Version 1.2.17rc4 [May 10, 2007]
-  Revised unknown chunk handling to avoid storing unknown critical chunks.
-
-Version 1.0.25 [May 15, 2007]
-Version 1.2.17 [May 15, 2007]
-  Added "png_ptr->num_trans=0" before error return in png_handle_tRNS,
-    to eliminate a vulnerability (CVE-2007-2445, CERT VU#684664)
-
-Version 1.0.26 [May 15, 2007]
-Version 1.2.18 [May 15, 2007]
-  Reverted the libpng-1.2.17rc3 change to symbol-handling in configure script
-
-Version 1.2.19beta1 [May 18, 2007]
-  Changed "const static" to "static PNG_CONST" everywhere, mostly undoing
-    change of libpng-1.2.17beta2.  Changed other "const" to "PNG_CONST"
-  Changed some handling of unused parameters, to avoid compiler warnings.
-    "if (unused == NULL) return;" becomes "unused = unused".
-
-Version 1.2.19beta2 [May 18, 2007]
-  Only use the valid bits of tRNS value in png_do_expand() (Brian Cartier)
-
-Version 1.2.19beta3 [May 19, 2007]
-  Add some "png_byte" typecasts in png_check_keyword() and write new_key
-  instead of key in zTXt chunk (Kevin Ryde).
-
-Version 1.2.19beta4 [May 21, 2007]
-  Add png_snprintf() function and use it in place of sprint() for improved
-    defense against buffer overflows.
-
-Version 1.2.19beta5 [May 21, 2007]
-  Fixed png_handle_tRNS() to only use the valid bits of tRNS value.
-  Changed handling of more unused parameters, to avoid compiler warnings.
-  Removed some PNG_CONST in pngwutil.c to avoid compiler warnings.
-
-Version 1.2.19beta6 [May 22, 2007]
-  Added some #ifdef PNG_MMX_CODE_SUPPORTED where needed in pngvcrd.c
-  Added a special "_MSC_VER" case that defines png_snprintf to _snprintf
-
-Version 1.2.19beta7 [May 22, 2007]
-  Squelched png_squelch_warnings() in pnggccrd.c and added
-    an #ifdef PNG_MMX_CODE_SUPPORTED block around the declarations that caused
-    the warnings that png_squelch_warnings was squelching.
-
-Version 1.2.19beta8 [May 22, 2007]
-  Removed __MMX__ from test in pngconf.h.
-
-Version 1.2.19beta9 [May 23, 2007]
-  Made png_squelch_warnings() available via PNG_SQUELCH_WARNINGS macro.
-  Revised png_squelch_warnings() so it might work.
-  Updated makefile.sgcc and makefile.solaris; added makefile.solaris-x86.
-
-Version 1.2.19beta10 [May 24, 2007]
-  Resquelched png_squelch_warnings(), use "__attribute__((used))" instead.
-
-Version 1.4.0beta1 [April 20, 2006]
-  Enabled iTXt support (changes png_struct, thus requires so-number change).
-  Cleaned up PNG_ASSEMBLER_CODE_SUPPORTED vs PNG_MMX_CODE_SUPPORTED
-  Eliminated PNG_1_0_X and PNG_1_2_X macros.
-  Removed deprecated functions png_read_init, png_write_init, png_info_init,
-    png_permit_empty_plte, png_set_gray_1_2_4_to_8, png_check_sig, and
-    removed the deprecated macro PNG_MAX_UINT.
-  Moved "PNG_INTERNAL" parts of png.h and pngconf.h into pngintrn.h
-  Removed many WIN32_WCE #ifdefs (Cosmin).
-  Reduced dependency on C-runtime library when on Windows (Simon-Pierre)
-  Replaced sprintf() with png_sprintf() (Simon-Pierre)
-
-Version 1.4.0beta2 [April 20, 2006]
-  Revised makefiles and configure to avoid making links to libpng.so.*
-  Moved some leftover MMX-related defines from pngconf.h to pngintrn.h
-  Updated scripts/pngos2.def, pngw32.def, and projects/wince/png32ce.def
-
-Version 1.4.0beta3 [May 10, 2006]
-  Updated scripts/pngw32.def to comment out MMX functions.
-  Added PNG_NO_GET_INT_32 and PNG_NO_SAVE_INT_32 macros.
-  Scripts/libpng.pc.in contained "configure" style version info and would
-    not work with makefiles.
-  Revised pngconf.h and added pngconf.h.in, so makefiles and configure can
-    pass defines to libpng and applications.
-
-Version 1.4.0beta4 [May 11, 2006]
-  Revised configure.ac, Makefile.am, and many of the makefiles to write
-    their defines in pngconf.h.
-
-Version 1.4.0beta5 [May 15, 2006]
-  Added a missing semicolon in Makefile.am and Makefile.in
-  Deleted extraneous square brackets from configure.ac
-
-Version 1.4.0beta6 [June 2, 2006]
-  Increased sprintf buffer from 50 to 52 chars in pngrutil.c to avoid
-    buffer overflow.
-  Changed sonum from 0 to 1.
-  Removed unused prototype for png_check_sig() from png.h
-
-Version 1.4.0beta7 [June 16, 2006]
-  Exported png_write_sig (Cosmin).
-  Optimized buffer in png_handle_cHRM() (Cosmin).
-  Set pHYs = 2835 x 2835 pixels per meter, and added
-    sCAL = 0.352778e-3 x 0.352778e-3 meters, in pngtest.png (Cosmin).
-  Added png_set_benign_errors(), png_benign_error(), png_chunk_benign_error().
-  Added typedef for png_int_32 and png_uint_32 on 64-bit systems.
-  Added "(unsigned long)" typecast on png_uint_32 variables in printf lists.
-
-Version 1.4.0beta8 [June 22, 2006]
-  Added demonstration of user chunk support in pngtest.c, to support the
-    public sTER chunk and a private vpAg chunk.
-
-Version 1.4.0beta9 [July 3, 2006]
-  Removed ordinals from scripts/pngw32.def and removed png_info_int and
-    png_set_gray_1_2_4_to_8 entries.
-  Inline call of png_get_uint_32() in png_get_uint_31().
-  Use png_get_uint_31() to get vpAg width and height in pngtest.c
-  Removed WINCE and Netware projects.
-  Removed standalone Y2KINFO file.
-
-Version 1.4.0beta10 [July 12, 2006]
-  Eliminated automatic copy of pngconf.h to pngconf.h.in from configure and
-    some makefiles, because it was not working reliably.  Instead, distribute
-    pngconf.h.in along with pngconf.h and cause configure and some of the
-    makefiles to update pngconf.h from pngconf.h.in.
-  Added pngconf.h to DEPENDENCIES in Makefile.am
-
-Version 1.4.0beta11 [August 19, 2006]
-  Removed AC_FUNC_MALLOC from configure.ac.
-  Added a warning when writing iCCP profile with mismatched profile length.
-  Patched pnggccrd.c to assemble on x86_64 platforms.
-  Moved chunk header reading into a separate function png_read_chunk_header()
-    in pngrutil.c.  The chunk header (len+sig) is now serialized in a single
-    operation (Cosmin).
-  Implemented support for I/O states. Added png_ptr member io_state, and
-    functions png_get_io_chunk_name() and png_get_io_state() in pngget.c
-    (Cosmin).
-  Added png_get_io_chunk_name and png_get_io_state to scripts/*.def (Cosmin).
-  Renamed scripts/pngw32.* to scripts/pngwin.* (Cosmin).
-  Removed the include directories and libraries from CFLAGS and LDFLAGS
-    in scripts/makefile.gcc (Cosmin).
-  Used png_save_uint_32() to set vpAg width and height in pngtest.c (Cosmin).
-  Cast to proper type when getting/setting vpAg units in pngtest.c (Cosmin).
-  Added pngintrn.h to the Visual C++ projects (Cosmin).
-  Removed scripts/list (Cosmin).
-  Updated copyright year in scripts/pngwin.def (Cosmin).
-  Removed PNG_TYPECAST_NULL and used standard NULL consistently (Cosmin).
-  Disallowed the user to redefine png_size_t, and enforced a consistent use
-    of png_size_t across libpng (Cosmin).
-  Changed the type of png_ptr->rowbytes, PNG_ROWBYTES() and friends
-    to png_size_t (Cosmin).
-  Removed png_convert_size() and replaced png_sizeof with sizeof (Cosmin).
-  Removed some unnecessary type casts (Cosmin).
-  Changed prototype of png_get_compression_buffer_size() and
-    png_set_compression_buffer_size() to work with png_size_t instead of
-    png_uint_32 (Cosmin).
-  Removed png_memcpy_check() and png_memset_check() (Cosmin).
-  Fixed a typo (png_byte --> png_bytep) in libpng.3 and libpng.txt (Cosmin).
-  Clarified that png_zalloc() does not clear the allocated memory,
-    and png_zalloc() and png_zfree() cannot be PNGAPI (Cosmin).
-  Renamed png_mem_size_t to png_alloc_size_t, fixed its definition in
-    pngconf.h, and used it in all memory allocation functions (Cosmin).
-  Renamed pngintrn.h to pngpriv.h, added a comment at the top of the file
-    mentioning that the symbols declared in that file are private, and
-    updated the scripts and the Visual C++ projects accordingly (Cosmin).
-  Removed circular references between pngconf.h and pngconf.h.in in
-    scripts/makefile.vc*win32 (Cosmin).
-  Removing trailing '.' from the warning and error messages (Cosmin).
-  Added pngdefs.h that is built by makefile or configure, instead of
-    pngconf.h.in (Glenn).
-  Detect and fix attempt to write wrong iCCP profile length.
-
-Version 1.4.0beta12 [October 19, 2006]
-  Changed "logical" to "bitwise" in the documentation.
-  Work around Intel-Mac compiler bug by setting PNG_NO_MMX_CODE in pngconf.h
-  Add a typecast to stifle compiler warning in pngrutil.c
-
-Version 1.4.0beta13 [November 10, 2006]
-  Fix potential buffer overflow in sPLT chunk handler.
-  Fix Makefile.am to not try to link to noexistent files.
-
-Version 1.4.0beta14 [November 15, 2006]
-  Check all exported functions for NULL png_ptr.
-
-Version 1.4.0beta15 [November 17, 2006]
-  Relocated two misplaced tests for NULL png_ptr.
-  Built Makefile.in with automake-1.9.6 instead of 1.9.2.
-  Build configure with autoconf-2.60 instead of 2.59
-  Add "install: all" in Makefile.am so "configure; make install" will work.
-
-Version 1.4.0beta16 [November 17, 2006]
-  Added a typecast in png_zalloc().
-
-Version 1.4.0beta17 [December 4, 2006]
-  Changed "new_key[79] = '\0';" to "(*new_key)[79] = '\0';" in pngwutil.c
-  Add "png_bytep" typecast to profile while calculating length in pngwutil.c
-
-Version 1.4.0beta18 [December 7, 2006]
-  Added scripts/CMakeLists.txt
-
-Version 1.4.0beta19 [May 16, 2007]
-  Revised scripts/CMakeLists.txt
-  Rebuilt configure and Makefile.in with newer tools.
-  Added conditional #undef jmpbuf in pngtest.c to undo #define in AIX headers.
-  Added scripts/makefile.nommx
-
-Version 1.4.0beta20 [July 9, 2008]
-  Moved several PNG_HAVE_* macros from pngpriv.h to png.h because applications
-    calling set_unknown_chunk_location() need them.
-  Moved several macro definitions from pngpriv.h to pngconf.h
-  Merge with changes to the 1.2.X branch, as of 1.2.30beta04.
-  Deleted all use of the MMX assembler code and Intel-licensed optimizations.
-  Revised makefile.mingw
-
-Version 1.4.0beta21 [July 21, 2008]
-  Moved local array "chunkdata" from pngrutil.c to the png_struct, so
-    it will be freed by png_read_destroy() in case of a read error (Kurt
-    Christensen).
-
-Version 1.4.0beta22 [July 21, 2008]
-  Change "purpose" and "buffer" to png_ptr->chunkdata to avoid memory leaking.
-
-Version 1.4.0beta23 [July 22, 2008]
-  Change "chunkdata = NULL" to "png_ptr->chunkdata = NULL" several places in
-    png_decompress_chunk().
-
-Version 1.4.0beta24 [July 25, 2008]
-  Change all remaining "chunkdata" to "png_ptr->chunkdata" in
-    png_decompress_chunk(), and remove "chunkdata" from parameter list.
-  Put a call to png_check_chunk_name() in png_read_chunk_header().
-  Revised png_check_chunk_name() to reject a name with a lowercase 3rd byte.
-  Removed two calls to png_check_chunk_name() occurring later in the process.
-  Define PNG_NO_ERROR_NUMBERS by default in pngconf.h
-
-Version 1.4.0beta25 [July 30, 2008]
-  Added a call to png_check_chunk_name() in pngpread.c
-  Reverted png_check_chunk_name() to accept a name with a lowercase 3rd byte.
-  Added png_push_have_buffer() function to pngpread.c
-  Eliminated PNG_BIG_ENDIAN_SUPPORTED and associated png_get_* macros.
-  Made inline expansion of png_get_*() optional with PNG_USE_READ_MACROS.
-  Eliminated all PNG_USELESS_TESTS and PNG_CORRECT_PALETTE_SUPPORTED code.
-  Synced contrib directory and configure files with libpng-1.2.30beta06.
-  Eliminated no-longer-used pngdefs.h (but it's still built in the makefiles)
-  Relocated a misplaced "#endif /* PNG_NO_WRITE_FILTER */" in pngwutil.c
-
-Version 1.4.0beta26 [August 4, 2008]
-  Removed png_push_have_buffer() function in pngpread.c.  It increased the
-    compiled library size slightly.
-  Changed "-Wall" to "-W -Wall" in the CFLAGS in all makefiles (Cosmin Truta)
-  Declared png_ptr "volatile" in pngread.c and pngwrite.c to avoid warnings.
-  Updated contrib/visupng/cexcept.h to version 2.0.1
-  Added PNG_LITERAL_CHARACTER macros for #, [, and ].
-
-Version 1.4.0beta27 [August 5, 2008]
-  Revised usage of PNG_LITERAL_SHARP in pngerror.c.
-  Moved newline character from individual png_debug messages into the
-    png_debug macros.
-  Allow user to #define their own png_debug, png_debug1, and png_debug2.
-
-Version 1.4.0beta28 [August 5, 2008]
-  Revised usage of PNG_LITERAL_SHARP in pngerror.c.
-  Added PNG_STRING_NEWLINE macro
-
-Version 1.4.0beta29 [August 9, 2008]
-  Revised usage of PNG_STRING_NEWLINE to work on non-ISO compilers.
-  Added PNG_STRING_COPYRIGHT macro.
-  Added non-ISO versions of png_debug macros.
-
-Version 1.4.0beta30 [August 14, 2008]
-  Added premultiplied alpha feature (Volker Wiendl).
-
-Version 1.4.0beta31 [August 18, 2008]
-  Moved png_set_premultiply_alpha from pngtrans.c to pngrtran.c
-  Removed extra crc check at the end of png_handle_cHRM().  Bug introduced
-    in libpng-1.4.0beta20.
-
-Version 1.4.0beta32 [August 19, 2008]
-  Added PNG_WRITE_FLUSH_SUPPORTED block around new png_flush() call.
-  Revised PNG_NO_STDIO version of png_write_flush()
-
-Version 1.4.0beta33 [August 20, 2008]
-  Added png_get|set_chunk_cache_max() to limit the total number of sPLT,
-    text, and unknown chunks that can be stored.
-
-Version 1.4.0beta34 [September 6, 2008]
-  Shortened tIME_string to 29 bytes in pngtest.c
-  Fixed off-by-one error introduced in png_push_read_zTXt() function in
-    libpng-1.2.30beta04/pngpread.c (Harald van Dijk)
-
-Version 1.4.0beta35 [October 6, 2008]
-  Changed "trans_values" to "trans_color".
-  Changed so-number from 0 to 14.  Some OS do not like 0.
-  Revised makefile.darwin to fix shared library numbering.
-  Change png_set_gray_1_2_4_to_8() to png_set_expand_gray_1_2_4_to_8()
-    in example.c (debian bug report)
-
-Version 1.4.0beta36 [October 25, 2008]
-  Sync with tEXt vulnerability fix in libpng-1.2.33rc02.
-
-Version 1.4.0beta37 [November 13, 2008]
-  Added png_check_cHRM in png.c and moved checking from pngget.c, pngrutil.c,
-    and pngwrite.c
-
-Version 1.4.0beta38 [November 22, 2008]
-  Added check for zero-area RGB cHRM triangle in png_check_cHRM() and
-    png_check_cHRM_fixed().
-
-Version 1.4.0beta39 [November 23, 2008]
-  Revised png_warning() to write its message on standard output by default
-    when warning_fn is NULL.
-
-Version 1.4.0beta40 [November 24, 2008]
-  Eliminated png_check_cHRM().  Instead, always use png_check_cHRM_fixed().
-  In png_check_cHRM_fixed(), ensure white_y is > 0, and removed redundant
-    check for all-zero coordinates that is detected by the triangle check.
-
-Version 1.4.0beta41 [November 26, 2008]
-  Fixed string vs pointer-to-string error in png_check_keyword().
-  Rearranged test expressions in png_check_cHRM_fixed() to avoid internal
-    overflows.
-  Added PNG_NO_CHECK_cHRM conditional.
-
-Version 1.4.0beta42, 43 [December 1, 2008]
-  Merge png_debug with version 1.2.34beta04.
-
-Version 1.4.0beta44 [December 6, 2008]
-  Removed redundant check for key==NULL before calling png_check_keyword()
-    to ensure that new_key gets initialized and removed extra warning
-    (Merge with version 1.2.34beta05 -- Arvan Pritchard).
-
-Version 1.4.0beta45 [December 9, 2008]
-  In png_write_png(), respect the placement of the filler bytes in an earlier
-    call to png_set_filler() (Jim Barry).
-
-Version 1.4.0beta46 [December 10, 2008]
-  Undid previous change and added PNG_TRANSFORM_STRIP_FILLER_BEFORE and
-    PNG_TRANSFORM_STRIP_FILLER_AFTER conditionals and deprecated
-    PNG_TRANSFORM_STRIP_FILLER (Jim Barry).
-
-Version 1.4.0beta47 [December 15, 2008]
-  Support for dithering was disabled by default, because it has never
-    been well tested and doesn't work very well.  The code has not
-    been removed, however, and can be enabled by building libpng with
-    PNG_READ_DITHER_SUPPORTED defined.
-
-Version 1.4.0beta48 [February 14, 2009]
-  Added new exported function png_calloc().
-  Combined several instances of png_malloc(); png_memset() into png_calloc().
-  Removed prototype for png_freeptr() that was added in libpng-1.4.0beta24
-    but was never defined.
-
-Version 1.4.0beta49 [February 28, 2009]
-  Added png_fileno() macro to pngconf.h, used in pngwio.c
-  Corrected order of #ifdef's in png_debug definition in png.h
-  Fixed bug introduced in libpng-1.4.0beta48 with the memset arguments
-    for pcal_params.
-  Fixed order of #ifdef directives in the png_debug defines in png.h
-    (bug introduced in libpng-1.2.34/1.4.0beta29).
-  Revised comments in png_set_read_fn() and png_set_write_fn().
-
-Version 1.4.0beta50 [March 18, 2009]
-  Use png_calloc() instead of png_malloc() to allocate big_row_buf when
-    reading an interlaced file, to avoid a possible UMR.
-  Undid revision of PNG_NO_STDIO version of png_write_flush().  Users
-    having trouble with fflush() can build with PNG_NO_WRITE_FLUSH defined
-    or supply their own flush_fn() replacement.
-  Revised libpng*.txt and png.h documentation about use of png_write_flush()
-    and png_set_write_fn().
-  Removed fflush() from pngtest.c.
-  Added "#define PNG_NO_WRITE_FLUSH" to contrib/pngminim/encoder/pngusr.h
-
-Version 1.4.0beta51 [March 21, 2009]
-  Removed new png_fileno() macro from pngconf.h .
-
-Version 1.4.0beta52 [March 27, 2009]
-  Relocated png_do_chop() ahead of building gamma tables in pngrtran.c
-    This avoids building 16-bit gamma tables unnecessarily.
-  Removed fflush() from pngtest.c.
-  Added "#define PNG_NO_WRITE_FLUSH" to contrib/pngminim/encoder/pngusr.h
-  Added a section on differences between 1.0.x and 1.2.x to libpng.3/libpng.txt
-
-Version 1.4.0beta53 [April 1, 2009]
-  Removed some remaining MMX macros from pngpriv.h
-  Fixed potential memory leak of "new_name" in png_write_iCCP() (Ralph Giles)
-
-Version 1.4.0beta54 [April 13, 2009]
-  Added "ifndef PNG_SKIP_SETJMP_CHECK" block in pngconf.h to allow
-    application code writers to bypass the check for multiple inclusion
-    of setjmp.h when they know that it is safe to ignore the situation.
-  Eliminated internal use of setjmp() in pngread.c and pngwrite.c
-  Reordered ancillary chunks in pngtest.png to be the same as what
-    pngtest now produces, and made some cosmetic changes to pngtest output.
-  Eliminated deprecated png_read_init_3() and png_write_init_3() functions.
-
-Version 1.4.0beta55 [April 15, 2009]
-  Simplified error handling in pngread.c and pngwrite.c by putting
-    the new png_read_cleanup() and png_write_cleanup() functions inline.
-
-Version 1.4.0beta56 [April 25, 2009]
-  Renamed "user_chunk_data" to "my_user_chunk_data" in pngtest.c to suppress
-    "shadowed declaration" warning from gcc-4.3.3.
-  Renamed "gamma" to "png_gamma" in pngset.c to avoid "shadowed declaration"
-    warning about a global "gamma" variable in math.h on some platforms.
-
-Version 1.4.0beta57 [May 2, 2009]
-  Removed prototype for png_freeptr() that was added in libpng-1.4.0beta24
-    but was never defined (again).
-  Rebuilt configure scripts with autoconf-2.63 instead of 2.62
-  Removed pngprefs.h and MMX from makefiles
-
-Version 1.4.0beta58 [May 14, 2009]
-  Changed pngw32.def to pngwin.def in makefile.mingw (typo was introduced
-    in beta57).
-  Clarified usage of sig_bit versus sig_bit_p in example.c (Vincent Torri)
-
-Version 1.4.0beta59 [May 15, 2009]
-  Reformatted sources in libpng style (3-space indentation, comment format)
-  Fixed typo in libpng docs (PNG_FILTER_AVE should be PNG_FILTER_AVG)
-  Added sections about the git repository and our coding style to the
-    documentation
-  Relocated misplaced #endif in pngwrite.c, sCAL chunk handler.
-
-Version 1.4.0beta60 [May 19, 2009]
-  Conditionally compile png_read_finish_row() which is not used by
-    progressive readers.
-  Added contrib/pngminim/preader to demonstrate building minimal progressive
-    decoder, based on contrib/gregbook with embedded libpng and zlib.
-
-Version 1.4.0beta61 [May 20, 2009]
-  In contrib/pngminim/*, renamed "makefile.std" to "makefile", since there
-    is only one makefile in those directories, and revised the README files
-    accordingly.
-  More reformatting of comments, mostly to capitalize sentences.
-
-Version 1.4.0beta62 [June 2, 2009]
-  Added "#define PNG_NO_WRITE_SWAP" to contrib/pngminim/encoder/pngusr.h
-    and "define PNG_NO_READ_SWAP" to decoder/pngusr.h and preader/pngusr.h
-  Reformatted several remaining "else statement" into two lines.
-  Added a section to the libpng documentation about using png_get_io_ptr()
-    in configure scripts to detect the presence of libpng.
-
-Version 1.4.0beta63 [June 15, 2009]
-  Revised libpng*.txt and libpng.3 to mention calling png_set_IHDR()
-    multiple times and to specify the sample order in the tRNS chunk,
-    because the ISO PNG specification has a typo in the tRNS table.
-  Changed several PNG_UNKNOWN_CHUNK_SUPPORTED to
-    PNG_HANDLE_AS_UNKNOWN_SUPPORTED, to make the png_set_keep mechanism
-    available for ignoring known chunks even when not saving unknown chunks.
-  Adopted preference for consistent use of "#ifdef" and "#ifndef" versus
-    "#if defined()" and "if !defined()" where possible.
-
-Version 1.4.0beta64 [June 24, 2009]
-  Eliminated PNG_LEGACY_SUPPORTED code.
-  Moved the various unknown chunk macro definitions outside of the
-    PNG_READ|WRITE_ANCILLARY_CHUNK_SUPPORTED blocks.
-
-Version 1.4.0beta65 [June 26, 2009]
-  Added a reference to the libpng license in each file.
-
-Version 1.4.0beta66 [June 27, 2009]
-  Refer to the libpng license instead of the libpng license in each file.
-
-Version 1.4.0beta67 [July 6, 2009]
-  Relocated INVERT_ALPHA within png_read_png() and png_write_png().
-  Added high-level API transform PNG_TRANSFORM_GRAY_TO_RGB.
-  Added an "xcode" project to the projects directory (Alam Arias).
-
-Version 1.4.0beta68 [July 19, 2009]
-  Avoid some tests in filter selection in pngwutil.c
-
-Version 1.4.0beta69 [July 25, 2009]
-  Simplified the new filter-selection test.  This runs faster in the
-    common "PNG_ALL_FILTERS" and PNG_FILTER_NONE cases.
-  Removed extraneous declaration from the new call to png_read_gray_to_rgb()
-    (bug introduced in libpng-1.4.0beta67).
-  Fixed up xcode project (Alam Arias)
-  Added a prototype for png_64bit_product() in png.c
-
-Version 1.4.0beta70 [July 27, 2009]
-  Avoid a possible NULL dereference in debug build, in png_set_text_2().
-    (bug introduced in libpng-0.95, discovered by Evan Rouault)
-
-Version 1.4.0beta71 [July 29, 2009]
-  Rebuilt configure scripts with autoconf-2.64.
-
-Version 1.4.0beta72 [August 1, 2009]
-  Replaced *.tar.lzma with *.tar.xz in distribution.  Get the xz codec
-    from <http://tukaani.org/xz>.
-
-Version 1.4.0beta73 [August 1, 2009]
-  Reject attempt to write iCCP chunk with negative embedded profile length
-    (JD Chen) (CVE-2009-5063).
-
-Version 1.4.0beta74 [August 8, 2009]
-  Changed png_ptr and info_ptr member "trans" to "trans_alpha".
-
-Version 1.4.0beta75 [August 21, 2009]
-  Removed an extra png_debug() recently added to png_write_find_filter().
-  Fixed incorrect #ifdef in pngset.c regarding unknown chunk support.
-
-Version 1.4.0beta76 [August 22, 2009]
-  Moved an incorrectly located test in png_read_row() in pngread.c
-
-Version 1.4.0beta77 [August 27, 2009]
-  Removed lpXYZ.tar.bz2 (with CRLF), KNOWNBUG, libpng-x.y.z-KNOWNBUG.txt,
-    and the "noconfig" files from the distribution.
-  Moved CMakeLists.txt from scripts into the main libpng directory.
-  Various bugfixes and improvements to CMakeLists.txt (Philip Lowman)
-
-Version 1.4.0beta78 [August 31, 2009]
-  Converted all PNG_NO_* tests to PNG_*_SUPPORTED everywhere except pngconf.h
-  Eliminated PNG_NO_FREE_ME and PNG_FREE_ME_SUPPORTED macros.
-  Use png_malloc plus a loop instead of png_calloc() to initialize
-    row_pointers in png_read_png().
-
-Version 1.4.0beta79 [September 1, 2009]
-  Eliminated PNG_GLOBAL_ARRAYS and PNG_LOCAL_ARRAYS; always use local arrays.
-  Eliminated PNG_CALLOC_SUPPORTED macro and always provide png_calloc().
-
-Version 1.4.0beta80 [September 17, 2009]
-  Removed scripts/libpng.icc
-  Changed typecast of filler from png_byte to png_uint_16 in png_set_filler().
-    (Dennis Gustafsson)
-  Fixed typo introduced in beta78 in pngtest.c ("#if def " should be "#ifdef ")
-
-Version 1.4.0beta81 [September 23, 2009]
-  Eliminated unused PNG_FLAG_FREE_* defines from pngpriv.h
-  Expanded TAB characters in pngrtran.c
-  Removed PNG_CONST from all "PNG_CONST PNG_CHNK" declarations to avoid
-    compiler complaints about doubly declaring things "const".
-  Changed all "#if [!]defined(X)" to "if[n]def X" where possible.
-  Eliminated unused png_ptr->row_buf_size
-
-Version 1.4.0beta82 [September 25, 2009]
-  Moved redundant IHDR checking into new png_check_IHDR() in png.c
-    and report all errors found in the IHDR data.
-  Eliminated useless call to png_check_cHRM() from pngset.c
-
-Version 1.4.0beta83 [September 25, 2009]
-  Revised png_check_IHDR() to eliminate bogus complaint about filter_type.
-
-Version 1.4.0beta84 [September 30, 2009]
-  Fixed some inconsistent indentation in pngconf.h
-  Revised png_check_IHDR() to add a test for width variable less than 32-bit.
-
-Version 1.4.0beta85 [October 1, 2009]
-  Revised png_check_IHDR() again, to check info_ptr members instead of
-    the contents of the returned parameters.
-
-Version 1.4.0beta86 [October 9, 2009]
-  Updated the "xcode" project (Alam Arias).
-  Eliminated a shadowed declaration of "pp" in png_handle_sPLT().
-
-Version 1.4.0rc01 [October 19, 2009]
-  Trivial cosmetic changes.
-
-Version 1.4.0beta87 [October 30, 2009]
-  Moved version 1.4.0 back into beta.
-
-Version 1.4.0beta88 [October 30, 2009]
-  Revised libpng*.txt section about differences between 1.2.x and 1.4.0
-    because most of the new features have now been ported back to 1.2.41
-
-Version 1.4.0beta89 [November 1, 2009]
-  More bugfixes and improvements to CMakeLists.txt (Philip Lowman)
-  Removed a harmless extra png_set_invert_alpha() from pngwrite.c
-  Apply png_user_chunk_cache_max within png_decompress_chunk().
-  Merged libpng-1.2.41.txt with libpng-1.4.0.txt where appropriate.
-
-Version 1.4.0beta90 [November 2, 2009]
-  Removed all remaining WIN32_WCE #ifdefs except those involving the
-    time.h "tm" structure
-
-Version 1.4.0beta91 [November 3, 2009]
-  Updated scripts/pngw32.def and projects/wince/png32ce.def
-  Copied projects/wince/png32ce.def to the scripts directory.
-  Added scripts/makefile.wce
-  Patched ltmain.sh for wince support.
-  Added PNG_CONVERT_tIME_SUPPORTED macro.
-
-Version 1.4.0beta92 [November 4, 2009]
-  Make inclusion of time.h in pngconf.h depend on PNG_CONVERT_tIME_SUPPORTED
-  Make #define PNG_CONVERT_tIME_SUPPORTED depend on PNG_WRITE_tIME_SUPPORTED
-  Revised libpng*.txt to describe differences from 1.2.40 to 1.4.0 (instead
-    of differences from 1.2.41 to 1.4.0)
-
-Version 1.4.0beta93 [November 7, 2009]
-  Added PNG_DEPSTRUCT, PNG_DEPRECATED, PNG_USE_RESULT, PNG_NORETURN, and
-    PNG_ALLOCATED macros to detect deprecated direct access to the
-    png_struct or info_struct members and other deprecated usage in
-    applications (John Bowler).
-  Updated scripts/makefile* to add "-DPNG_CONFIGURE_LIBPNG" to CFLAGS,
-    to prevent warnings about direct access to png structs by libpng
-    functions while building libpng.  They need to be tested, especially
-    those using compilers other than gcc.
-  Updated projects/visualc6 and visualc71 with "/d PNG_CONFIGURE_LIBPNG".
-    They should work but still need to be updated to remove
-    references to pnggccrd.c or pngvcrd.c and ASM building.
-  Added README.txt to the beos, cbuilder5, netware, and xcode projects warning
-    that they need to be updated, to remove references to pnggccrd.c and
-    pngvcrd.c and to depend on pngpriv.h
-  Removed three direct references to read_info_ptr members in pngtest.c
-    that were detected by the new PNG_DEPSTRUCT macro.
-  Moved the png_debug macro definitions and the png_read_destroy(),
-    png_write_destroy() and png_far_to_near() prototypes from png.h
-    to pngpriv.h (John Bowler)
-  Moved the synopsis lines for png_read_destroy(), png_write_destroy()
-    png_debug(), png_debug1(), and png_debug2() from libpng.3 to libpngpf.3.
-
-Version 1.4.0beta94 [November 9, 2009]
-  Removed the obsolete, unused pnggccrd.c and pngvcrd.c files.
-  Updated CMakeLists.txt to add "-DPNG_CONFIGURE_LIBPNG" to the definitions.
-  Removed dependency of pngtest.o on pngpriv.h in the makefiles.
-  Only #define PNG_DEPSTRUCT, etc. in pngconf.h if not already defined.
-
-Version 1.4.0beta95 [November 10, 2009]
-  Changed png_check_sig() to !png_sig_cmp() in contrib programs.
-  Added -DPNG_CONFIGURE_LIBPNG to contrib/pngminm/*/makefile
-  Changed png_check_sig() to !png_sig_cmp() in contrib programs.
-  Corrected the png_get_IHDR() call in contrib/gregbook/readpng2.c
-  Changed pngminim/*/gather.sh to stop trying to remove pnggccrd.c and pngvcrd.c
-  Added dependency on pngpriv.h in contrib/pngminim/*/makefile
-
-Version 1.4.0beta96 [November 12, 2009]
-  Renamed scripts/makefile.wce to scripts/makefile.cegcc
-  Revised Makefile.am to use libpng.sys while building libpng.so
-    so that only PNG_EXPORT functions are exported.
-  Removed the deprecated png_check_sig() function/macro.
-  Removed recently removed function names from scripts/*.def
-  Revised pngtest.png to put chunks in the same order written by pngtest
-    (evidently the same change made in libpng-1.0beta54 was lost).
-  Added PNG_PRIVATE macro definition in pngconf.h for possible future use.
-
-Version 1.4.0beta97 [November 13, 2009]
-  Restored pngtest.png to the libpng-1.4.0beta7 version.
-  Removed projects/beos and netware.txt; no one seems to be supporting them.
-  Revised Makefile.in
-
-Version 1.4.0beta98 [November 13, 2009]
-  Added the "xcode" project to zip distributions,
-  Fixed a typo in scripts/pngwin.def introduced in beta97.
-
-Version 1.4.0beta99 [November 14, 2009]
-  Moved libpng-config.in and libpng.pc-configure.in out of the scripts
-    directory, to libpng-config.in and libpng-pc.in, respectively, and
-    modified Makefile.am and configure.ac accordingly.  Now "configure"
-    needs nothing from the "scripts" directory.
-  Avoid redefining PNG_CONST in pngconf.h
-
-Version 1.4.0beta100 [November 14, 2009]
-  Removed ASM builds from projects/visualc6 and projects/visualc71
-  Removed scripts/makefile.nommx and makefile.vcawin32
-  Revised CMakeLists.txt to account for new location of libpng-config.in
-    and libpng-pc.in
-  Updated INSTALL to reflect removal and relocation of files.
-
-Version 1.4.0beta101 [November 14, 2009]
-  Restored the binary files (*.jpg, *.png, some project files) that were
-    accidentally deleted from the zip and 7z distributions when the xcode
-    project was added.
-
-Version 1.4.0beta102 [November 18, 2009]
-  Added libpng-config.in and libpng-pc.in to the zip and 7z distributions.
-  Fixed a typo in projects/visualc6/pngtest.dsp, introduced in beta100.
-  Moved descriptions of makefiles and other scripts out of INSTALL into
-    scripts/README.txt
-  Updated the copyright year in scripts/pngwin.rc from 2006 to 2009.
-
-Version 1.4.0beta103 [November 21, 2009]
-  Removed obsolete comments about ASM from projects/visualc71/README_zlib.txt
-  Align row_buf on 16-byte boundary in memory.
-  Restored the PNG_WRITE_FLUSH_AFTER_IEND_SUPPORTED guard around the call
-    to png_flush() after png_write_IEND().  See 1.4.0beta32, 1.4.0beta50
-    changes above and 1.2.30, 1.2.30rc01 and rc03 in 1.2.41 CHANGES.  Someone
-    needs this feature.
-  Make the 'png_jmpbuf' macro expand to a call that records the correct
-    longjmp function as well as returning a pointer to the setjmp
-    jmp_buf buffer, and marked direct access to jmpbuf 'deprecated'.
-    (John Bowler)
-
-Version 1.4.0beta104 [November 22, 2009]
-  Removed png_longjmp_ptr from scripts/*.def and libpng.3
-  Rebuilt configure scripts with autoconf-2.65
-
-Version 1.4.0beta105 [November 25, 2009]
-  Use fast integer PNG_DIVIDE_BY_255() or PNG_DIVIDE_BY_65535()
-    to accomplish alpha premultiplication when
-    PNG_READ_COMPOSITE_NODIV_SUPPORTED is defined.
-  Changed "/255" to "/255.0" in background calculations to make it clear
-    that the 255 is used as a double.
-
-Version 1.4.0beta106 [November 27, 2009]
-  Removed premultiplied alpha feature.
-
-Version 1.4.0beta107 [December 4, 2009]
-  Updated README
-  Added "#define PNG_NO_PEDANTIC_WARNINGS" in the libpng source files.
-  Removed "-DPNG_CONFIGURE_LIBPNG" from the makefiles and projects.
-  Revised scripts/makefile.netbsd, makefile.openbsd, and makefile.sco
-    to put png.h and pngconf.h in $prefix/include, like the other scripts,
-    instead of in $prefix/include/libpng.  Also revised makefile.sco
-    to put them in $prefix/include/libpng15 instead of in
-    $prefix/include/libpng/libpng15.
-
-Version 1.4.0beta108 [December 11, 2009]
-  Removed leftover "-DPNG_CONFIGURE_LIBPNG" from contrib/pngminim/*/makefile
-  Relocated png_do_chop() to its original position in pngrtran.c; the
-    change in version 1.2.41beta08 caused transparency to be handled wrong
-    in some 16-bit datastreams (Yusaku Sugai).
-
-Version 1.4.0beta109 [December 13, 2009]
-  Added "bit_depth" parameter to the private png_build_gamma_table() function.
-  Pass bit_depth=8 to png_build_gamma_table() when bit_depth is 16 but the
-    PNG_16_TO_8 transform has been set, to avoid unnecessary build of 16-bit
-    tables.
-
-Version 1.4.0rc02 [December 20, 2009]
-  Declared png_cleanup_needed "volatile" in pngread.c and pngwrite.c
-
-Version 1.4.0rc03 [December 22, 2009]
-  Renamed libpng-pc.in back to libpng.pc.in and revised CMakeLists.txt
-    (revising the change in 1.4.0beta99)
-
-Version 1.4.0rc04 [December 25, 2009]
-  Swapped PNG_UNKNOWN_CHUNKS_SUPPORTED and PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-    in pngset.c to be consistent with other changes in version 1.2.38.
-
-Version 1.4.0rc05 [December 25, 2009]
-  Changed "libpng-pc.in" to "libpng.pc.in" in configure.ac, configure, and
-    Makefile.in to be consistent with changes in libpng-1.4.0rc03
-
-Version 1.4.0rc06 [December 29, 2009]
-  Reverted the gamma_table changes from libpng-1.4.0beta109.
-  Fixed some indentation errors.
-
-Version 1.4.0rc07 [January 1, 2010]
-  Revised libpng*.txt and libpng.3 about 1.2.x->1.4.x differences.
-  Use png_calloc() instead of png_malloc(); png_memset() in pngrutil.c
-  Update copyright year to 2010.
-
-Version 1.4.0rc08 [January 2, 2010]
-  Avoid deprecated references to png_ptr-io_ptr and png_ptr->error_ptr
-    in pngtest.c
-
-Version 1.4.0 [January 3, 2010]
-  No changes.
-
-Version 1.4.1beta01 [January 8, 2010]
-  Updated CMakeLists.txt for consistent indentation and to avoid an
-    unclosed if-statement warning (Philip Lowman).
-  Revised Makefile.am and Makefile.in to remove references to Y2KINFO,
-    KNOWNBUG, and libpng.la (Robert Schwebel).
-  Revised the makefiles to install the same files and symbolic
-    links as configure, except for libpng.la and libpng14.la.
-  Make png_set|get_compression_buffer_size() available even when
-    PNG_WRITE_SUPPORTED is not enabled.
-  Revised Makefile.am and Makefile.in to simplify their maintenance.
-  Revised scripts/makefile.linux to install a link to libpng14.so.14.1
-
-Version 1.4.1beta02 [January 9, 2010]
-  Revised the rest of the makefiles to install a link to libpng14.so.14.1
-
-Version 1.4.1beta03 [January 10, 2010]
-  Removed png_set_premultiply_alpha() from scripts/*.def
-
-Version 1.4.1rc01 [January 16, 2010]
-  No changes.
-
-Version 1.4.1beta04 [January 23, 2010]
-  Revised png_decompress_chunk() to improve speed and memory usage when
-    decoding large chunks.
-  Added png_set|get_chunk_malloc_max() functions.
-
-Version 1.4.1beta05 [January 26, 2010]
-  Relocated "int k" declaration in pngtest.c to minimize its scope.
-
-Version 1.4.1beta06 [January 28, 2010]
-  Revised png_decompress_chunk() to use a two-pass method suggested by
-    John Bowler.
-
-Version 1.4.1beta07 [February 6, 2010]
-  Folded some long lines in the source files.
-  Added definable PNG_USER_CHUNK_CACHE_MAX, PNG_USER_CHUNK_MALLOC_MAX,
-    and a PNG_USER_LIMITS_SUPPORTED flag.
-  Eliminated use of png_ptr->irowbytes and reused the slot in png_ptr as
-    png_ptr->png_user_chunk_malloc_max.
-  Revised png_push_save_buffer() to do fewer but larger png_malloc() calls.
-
-Version 1.4.1beta08 [February 6, 2010]
-  Minor cleanup and updating of dates and copyright year.
-
-Version 1.5.0beta01 [February 7, 2010]
-  Moved declaration of png_struct into private pngstruct.h and png_info
-    into pnginfo.h
-
-Version 1.4.1beta09 and 1.5.0beta02 [February 7, 2010]
-  Reverted to original png_push_save_buffer() code.
-
-Version 1.4.1beta10 and 1.5.0beta03 [February 8, 2010]
-  Return allocated "old_buffer" in png_push_save_buffer() before
-    calling png_error(), to avoid a potential memory leak.
-  Updated configure script to use SO number 15.
-
-Version 1.5.0beta04 [February 9, 2010]
-  Removed malformed "incomplete struct declaration" of png_info from png.h
-
-Version 1.5.0beta05 [February 12, 2010]
-  Removed PNG_DEPSTRUCT markup in pngstruct.h and pnginfo.h, and undid the
-    linewrapping that it entailed.
-  Revised comments in pngstruct.h and pnginfo.h and added pointers to
-    the libpng license.
-  Changed PNG_INTERNAL to PNG_EXPOSE_INTERNAL_STRUCTURES
-  Removed the cbuilder5 project, which has not been updated to 1.4.0.
-
-Version 1.4.1beta12 and 1.5.0beta06 [February 14, 2010]
-  Fixed type declaration of png_get_chunk_malloc_max() in pngget.c (Daisuke
-    Nishikawa)
-
-Version 1.5.0beta07 [omitted]
-
-Version 1.5.0beta08 [February 19, 2010]
-  Changed #ifdef PNG_NO_STDIO_SUPPORTED to #ifdef PNG_NO_CONSOLE_IO_SUPPORTED
-    wherever png_snprintf() is used to construct error and warning messages.
-  Noted in scripts/makefile.mingw that it expects to be run under MSYS.
-  Removed obsolete unused MMX-querying support from contrib/gregbook
-  Added exported png_longjmp() function.
-  Removed the AIX redefinition of jmpbuf in png.h
-  Added -D_ALLSOURCE in configure.ac, makefile.aix, and CMakeLists.txt
-    when building on AIX.
-
-Version 1.5.0beta09 [February 19, 2010]
-  Removed -D_ALLSOURCE from configure.ac, makefile.aix, and CMakeLists.txt.
-  Changed the name of png_ptr->jmpbuf to png_ptr->png_jmpbuf in pngstruct.h
-
-Version 1.5.0beta10 [February 25, 2010]
-  Removed unused gzio.c from contrib/pngminim gather and makefile scripts
-  Removed replacement error handlers from contrib/gregbook.  Because of
-    the new png_longjmp() function they are no longer needed.
-
-Version 1.5.0beta11 [March 6, 2010]
-  Removed checking for already-included setjmp.h from pngconf.h
-  Fixed inconsistent indentations and made numerous cosmetic changes.
-  Revised the "SEE ALSO" style of libpng.3, libpngpf.3, and png.5
-
-Version 1.5.0beta12 [March 9, 2010]
-  Moved "#include png.h" inside pngpriv.h and removed "#include png.h" from
-    the source files, along with "#define PNG_EXPOSE_INTERNAL_STRUCTURES"
-    and "#define PNG_NO_PEDANTIC_WARNINGS" (John Bowler).
-  Created new pngdebug.h and moved debug definitions there.
-
-Version 1.5.0beta13 [March 10, 2010]
-  Protect pngstruct.h, pnginfo.h, and pngdebug.h from being included twice.
-  Revise the "#ifdef" blocks in png_inflate() so it will compile when neither
-    PNG_USER_CHUNK_MALLOC_MAX nor PNG_SET_CHUNK_MALLOC_LIMIT_SUPPORTED
-    is defined.
-  Removed unused png_measure_compressed_chunk() from pngpriv.h and libpngpf.3
-  Moved the 'config.h' support from pngconf.h to pngpriv.h
-  Removed PNGAPI from the png_longjmp_ptr typedef.
-  Eliminated dependence of pngtest.c on the private pngdebug.h file.
-  Make all png_debug macros into *unterminated* statements or
-    expressions (i.e. a trailing ';' must always be added) and correct
-    the format statements in various png_debug messages.
-
-Version 1.5.0beta14 [March 14, 2010]
-  Removed direct access to png_ptr->io_ptr from the Windows code in pngtest.c
-  Revised Makefile.am to account for recent additions and replacements.
-  Corrected CE and OS/2 DEF files (scripts/png*def) for symbols removed and
-    added ordinal numbers to the Windows DEF file and corrected the duplicated
-    ordinal numbers on CE symbols that are commented out.
-  Added back in export symbols that can be present in the Windows build but
-    are disabled by default.
-  PNG_EXPORT changed to include an 'ordinal' field for DEF file generation.
-    PNG_CALLBACK added to make callback definitions uniform.  PNGAPI split
-    into PNGCAPI (base C form), PNGAPI (exports) and PNGCBAPI (callbacks),
-    and appropriate changes made to all files.  Cygwin builds re-hinged to
-    allow procedure call standard changes and to remove the need for the DEF
-    file (fixes build on Cygwin).
-  Enabled 'attribute' warnings that are relevant to library APIs and callbacks.
-  Changed rules for generation of the various symbol files and added a new
-    rule for a DEF file (which is also added to the distribution).
-  Updated the symbol file generation to stop it adding spurious spaces
-    to EOL (coming from preprocessor macro expansion).  Added a facility
-    to join tokens in the output and rewrite *.dfn to use this.
-  Eliminated scripts/*.def in favor of libpng.def; updated projects/visualc71
-    and removed scripts/makefile.cygwin.
-  Made PNG_BUILD_DLL safe: it can be set whenever a DLL is being built.
-  Removed the include of sys/types.h - apparently unnecessary now on the
-    platforms on which it happened (all but Mac OS and RISC OS).
-  Moved the Mac OS test into pngpriv.h (the only place it is used.)
-
-Version 1.5.0beta15 [March 17, 2010]
-  Added symbols.chk target to Makefile.am to validate the symbols in png.h
-    against the new DEF file scripts/symbols.def.
-  Changed the default DEF file back to pngwin.def.
-  Removed makefile.mingw.
-  Eliminated PNG_NO_EXTERN and PNG_ALL_EXTERN
-
-Version 1.5.0beta16 [April 1, 2010]
-  Make png_text_struct independent of PNG_iTXt_SUPPORTED, so that
-    fields are initialized in all configurations.  The READ/WRITE
-    macros (PNG_(READ|WRITE)_iTXt_SUPPORTED) still function as
-    before to disable code to actually read or write iTXt chunks
-    and iTXt_SUPPORTED can be used to detect presence of either
-    read or write support (but it is probably better to check for
-    the one actually required - read or write.)
-  Combined multiple png_warning() calls for a single error.
-  Restored the macro definition of png_check_sig().
-
-Version 1.5.0beta17 [April 17, 2010]
-  Added some "(long)" typecasts to printf calls in png_handle_cHRM().
-  Documented the fact that png_set_dither() was disabled since libpng-1.4.0.
-  Reenabled png_set_dither() but renamed it to png_set_quantize() to reflect
-    more accurately what it actually does.  At the same time, renamed
-    the PNG_DITHER_[RED,GREEN_BLUE]_BITS macros to
-    PNG_QUANTIZE_[RED,GREEN,BLUE]_BITS.
-  Added some "(long)" typecasts to printf calls in png_handle_cHRM().
-  Freeze build-time only configuration in the build.
-    In all prior versions of libpng most configuration options
-    controlled by compiler #defines had to be repeated by the
-    application code that used libpng.  This patch changes this
-    so that compilation options that can only be changed at build
-    time are frozen in the build.  Options that are compiler
-    dependent (and those that are system dependent) are evaluated
-    each time - pngconf.h holds these.  Options that can be changed
-    per-file in the application are in png.h.  Frozen options are
-    in the new installed header file pnglibconf.h (John Bowler)
-  Removed the xcode project because it has not been updated to work
-    with libpng-1.5.0.
-  Removed the ability to include optional pngusr.h
-
-Version 1.5.0beta18 [April 17, 2010]
-  Restored the ability to include optional pngusr.h
-  Moved replacements for png_error() and png_warning() from the
-    contrib/pngminim project to pngerror.c, for use when warnings or
-    errors are disabled via PNG_NO_WARN or PNG_NO_ERROR_TEXT, to avoid
-    storing unneeded error/warning text.
-  Updated contrib/pngminim project to work with the new pnglibconf.h
-  Added some PNG_NO_* defines to contrib/pngminim/*/pngusr.h to save space.
-
-Version 1.5.0beta19 [April 24, 2010]
-  Added PNG_{READ,WRITE}_INT_FUNCTIONS_SUPPORTED.  This allows the functions
-    to read and write ints to be disabled independently of PNG_USE_READ_MACROS,
-    which allows libpng to be built with the functions even though the default
-    is to use the macros - this allows applications to choose at app build
-    time whether or not to use macros (previously impossible because the
-    functions weren't in the default build.)
-  Changed Windows calling convention back to __cdecl for API functions.
-    For Windows/x86 platforms only:
-      __stdcall is no longer needed for Visual Basic, so libpng-1.5.0 uses
-      __cdecl throughout (both API functions and callbacks) on Windows/x86
-      platforms.
-  Replaced visualc6 and visualc71 projects with new vstudio project
-  Relaxed the overly-restrictive permissions of some files.
-
-Version 1.5.0beta20 [April 24, 2010]
-  Relaxed more overly-restrictive permissions of some files.
-
-Version 1.5.0beta21 [April 27, 2010]
-  Removed some unwanted binary bytes and changed CRLF to NEWLINE in the new
-    vstudio project files, and some trivial editing of some files in the
-    scripts directory.
-  Set PNG_NO_READ_BGR, PNG_NO_IO_STATE, and PNG_NO_TIME_RFC1123 in
-    contrib/pngminim/decoder/pngusr.h to make a smaller decoder application.
-
-Version 1.5.0beta22 [April 28, 2010]
-  Fixed dependencies of GET_INT_32 - it does not require READ_INT_FUNCTIONS
-    because it has a macro equivalent.
-  Improved the options.awk script; added an "everything off" option.
-  Revised contrib/pngminim to use the "everything off" option in pngusr.dfa.
-
-Version 1.5.0beta23 [April 29, 2010]
-  Corrected PNG_REMOVED macro to take five arguments.
-    The macro was documented with two arguments (name,ordinal), however
-    the symbol checking .dfn files assumed five arguments.  The five
-    argument form seems more useful so it is changed to that.
-  Corrected PNG_UNKNOWN_CHUNKS_SUPPORTED to PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-    in gregbook/readpng2.c
-  Corrected protection of png_get_user_transform_ptr. The API declaration in
-    png.h is removed if both READ and WRITE USER_TRANSFORM are turned off
-    but was left defined in pngtrans.c
-  Added logunsupported=1 to cause pnglibconf.h to document disabled options.
-    This makes the installed pnglibconf.h more readable but causes no
-    other change.  The intention is that users of libpng will find it
-    easier to understand if an API they need is missing.
-  Include png_reset_zstream() in png.c only when PNG_READ_SUPPORTED is defined.
-  Removed dummy_inflate.c from contrib/pngminim/encoder
-  Removed contrib/pngminim/*/gather.sh; gathering is now done in the makefile.
-
-Version 1.5.0beta24 [May 7, 2010]
-  Use bitwise "&" instead of arithmetic mod in pngrutil.c calculation of the
-    offset of the png_ptr->rowbuf pointer into png_ptr->big_row_buf.
-  Added more blank lines for readability.
-
-Version 1.5.0beta25 [June 18, 2010]
-  In pngpread.c: png_push_have_row() add check for new_row > height
-  Removed the now-redundant check for out-of-bounds new_row from example.c
-
-Version 1.5.0beta26 [June 18, 2010]
-  In pngpread.c: png_push_process_row() add check for too many rows.
-
-Version 1.5.0beta27 [June 18, 2010]
-  Removed the check added in beta25 as it is now redundant.
-
-Version 1.5.0beta28 [June 20, 2010]
-  Rewrote png_process_IDAT_data to consistently treat extra data as warnings
-    and handle end conditions more cleanly.
-  Removed the new (beta26) check in png_push_process_row().
-
-Version 1.5.0beta29 [June 21, 2010]
-  Revised scripts/options.awk to work on Sunos (but still doesn't work)
-  Added comment to options.awk and contrib/pngminim/*/makefile to try nawk.
-
-Version 1.5.0beta30 [June 22, 2010]
-  Stop memory leak when reading a malformed sCAL chunk.
-
-Version 1.5.0beta31 [June 26, 2010]
-  Revised pngpread.c patch of beta28 to avoid an endless loop.
-  Removed some trailing blanks.
-
-Version 1.5.0beta32 [June 26, 2010]
-  Removed leftover scripts/options.patch and scripts/options.rej
-
-Version 1.5.0beta33 [July 6, 3010]
-  Made FIXED and FLOATING options consistent in the APIs they enable and
-    disable.  Corrected scripts/options.awk to handle both command line
-    options and options specified in the .dfa files.
-  Changed char *msg to PNG_CONST char *msg in pngrutil.c
-  Make png_set_sRGB_gAMA_and_cHRM set values using either the fixed or
-    floating point APIs, but not both.
-  Reversed patch to remove error handler when the jmp_buf is stored in the
-    main program structure, not the png_struct.
-    The error handler is needed because the default handler in libpng will
-    always use the jmp_buf in the library control structure; this is never
-    set.  The gregbook code is a useful example because, even though it
-    uses setjmp/longjmp, it shows how error handling can be implemented
-    using control mechanisms not directly supported by libpng.  The
-    technique will work correctly with mechanisms such as Microsoft
-    Structure Exceptions or C++ exceptions (compiler willing - note that gcc
-    does not by default support interworking of C and C++ error handling.)
-  Reverted changes to call png_longjmp in contrib/gregbook where it is not
-    appropriate.  If mainprog->jmpbuf is used by setjmp, then png_longjmp
-    cannot be used.
-  Changed "extern PNG_EXPORT" to "PNG_EXPORT" in png.h (Jan Nijtmans)
-  Changed "extern" to "PNG_EXTERN" in pngpriv.h (except for the 'extern "C" {')
-
-Version 1.5.0beta34 [July 12, 2010]
-  Put #ifndef PNG_EXTERN, #endif around the define PNG_EXTERN in pngpriv.h
-
-Version 1.5.0beta35 [July 24, 2010]
-  Removed some newly-added TAB characters.
-  Added -DNO_PNG_SNPRINTF to CFLAGS in scripts/makefile.dj2
-  Moved the definition of png_snprintf() outside of the enclosing
-    #ifdef blocks in pngconf.h
-
-Version 1.5.0beta36 [July 29, 2010]
-  Patches by John Bowler:
-  Fixed point APIs are now supported throughout (no missing APIs).
-  Internal fixed point arithmetic support exists for all internal floating
-    point operations.
-  sCAL validates the floating point strings it is passed.
-  Safe, albeit rudimentary, Watcom support is provided by PNG_API_RULE==2
-  Two new APIs exist to get the number of passes without turning on the
-    PNG_INTERLACE transform and to get the number of rows in the current
-    pass.
-  A new test program, pngvalid.c, validates the gamma code.
-  Errors in the 16-bit gamma correction (overflows) have been corrected.
-  cHRM chunk testing is done consistently (previously the floating point
-    API bypassed it, because the test really didn't work on FP, now the test
-    is performed on the actual values to be stored in the PNG file so it
-    works in the FP case too.)
-  Most floating point APIs now simply call the fixed point APIs after
-    converting the values to the fixed point form used in the PNG file.
-  The standard headers no longer include zlib.h, which is currently only
-    required for pngstruct.h and can therefore be internal.
-  Revised png_get_int_32 to undo the PNG two's complement representation of
-    negative numbers.
-
-Version 1.5.0beta37 [July 30, 2010]
-  Added a typecast in png_get_int_32() in png.h and pngrutil.h to avoid
-    a compiler warning.
-  Replaced oFFs 0,0 with oFFs -10,20 in pngtest.png
-
-Version 1.5.0beta38 [July 31, 2010]
-  Implemented remaining "_fixed" functions.
-  Corrected a number of recently introduced warnings mostly resulting from
-    safe but uncast assignments to shorter integers.  Also added a zlib
-    VStudio release library project because the latest zlib Official Windows
-    build does not include such a thing.
-  Revised png_get_int_16() to be similar to png_get_int_32().
-  Restored projects/visualc71.
-
-Version 1.5.0beta39 [August 2, 2010]
-  VisualC/GCC warning fixes, VisualC build fixes
-  The changes include support for function attributes in VC in addition to
-    those already present in GCC - necessary because without these some
-    warnings are unavoidable.  Fixes include signed/unsigned fixes in
-    pngvalid and checks with gcc -Wall -Wextra -Wunused.
-  VC requires function attributes on function definitions as well as
-    declarations, PNG_FUNCTION has been added to enable this and the
-    relevant function definitions changed.
-
-Version 1.5.0beta40 [August 6, 2010]
-  Correct use of _WINDOWS_ in pngconf.h
-  Removed png_mem_ #defines; they are no longer used.
-  Added the sRGB chunk to pngtest.png
-
-Version 1.5.0beta41 [August 11, 2010]
-  Added the cHRM chunk to pngtest.png
-  Don't try to use version-script with cygwin/mingw.
-  Revised contrib/gregbook to work under cygwin/mingw.
-
-Version 1.5.0beta42 [August 18, 2010]
-  Add .dll.a to the list of extensions to be symlinked by Makefile.am (Yaakov)
-  Made all API functions that have const arguments and constant string
-    literal pointers declare them (John Bowler).
-
-Version 1.5.0beta43 [August 20, 2010]
-  Removed spurious tabs, shorten long lines (no source change)
-    Also added scripts/chkfmt to validate the format of all the files that can
-    reasonably be validated (it is suggested to run "make distclean" before
-    checking, because some machine generated files have long lines.)
-  Reformatted the CHANGES file to be more consistent throughout.
-  Made changes to address various issues identified by GCC, mostly
-    signed/unsigned and shortening problems on assignment but also a few
-    difficult to optimize (for GCC) loops.
-  Fixed non-GCC fixed point builds.  In png.c a declaration was misplaced
-    in an earlier update.  Fixed to declare the auto variables at the head.
-  Use cexcept.h in pngvalid.c.
-
-Version 1.5.0beta44 [August 24, 2010]
-  Updated CMakeLists.txt to use CMAKE_INSTALL_LIBDIR variable; useful for
-    installing libpng in /usr/lib64 (Funda Wang).
-  Revised CMakeLists.txt to put the man pages in share/man/man* not man/man*
-  Revised CMakeLists.txt to make symlinks instead of copies when installing.
-  Changed PNG_LIB_NAME from pngNN to libpngNN in CMakeLists.txt (Philip Lowman)
-  Implemented memory checks within pngvalid
-  Reformatted/rearranged pngvalid.c to assist use of progressive reader.
-  Check interlaced images in pngvalid
-  Clarified pngusr.h comments in pnglibconf.dfa
-  Simplified the pngvalid error-handling code now that cexcept.h is in place.
-  Implemented progressive reader in pngvalid.c for standard tests
-  Implemented progressive read in pngvalid.c gamma tests
-  Turn on progressive reader in pngvalid.c by default and tidy code.
-
-Version 1.5.0beta45 [August 26, 2010]
-  Added an explicit make step to projects/vstudio for pnglibconf.h
-    Also corrected zlib.vcxproj into which Visual Studio had introduced
-    what it calls an "authoring error".  The change to make pnglibconf.h
-    simply copies the file; in the future it may actually generate the
-    file from scripts/pnglibconf.dfa as the other build systems do.
-  Changed pngvalid to work when floating point APIs are disabled
-  Renamed the prebuilt scripts/pnglibconf.h to scripts/pnglibconf.h.prebuilt
-  Supply default values for PNG_USER_PRIVATEBUILD and PNG_USER_DLLFNAME_POSTFIX
-    in pngpriv.h in case the user neglected to define them in their pngusr.h
-
-Version 1.5.0beta46 [August 28, 2010]
-  Added new private header files to libpng_sources in CMakeLists.txt
-  Added PNG_READ_16BIT, PNG_WRITE_16BIT, and PNG_16BIT options.
-  Added reference to scripts/pnglibconf.h.prebuilt in the visualc71 project.
-
-Version 1.5.0beta47 [September 11, 2010]
-  Fixed a number of problems with 64-bit compilation reported by Visual
-    Studio 2010 (John Bowler).
-
-Version 1.5.0beta48 [October 4, 2010]
-  Updated CMakeLists.txt (Philip Lowman).
-  Revised autogen.sh to recognize and use $AUTOCONF, $AUTOMAKE, $AUTOHEADER,
-    $AUTOPOINT, $ACLOCAL and $LIBTOOLIZE
-  Fixed problem with symbols creation in Makefile.am which was assuming that
-    all versions of ccp write to standard output by default (Martin Banky). The
-    bug was introduced in libpng-1.2.9beta5.
-  Removed unused mkinstalldirs.
-
-Version 1.5.0beta49 [October 8, 2010]
-  Undid Makefile.am revision of 1.5.0beta48.
-
-Version 1.5.0beta50 [October 14, 2010]
-  Revised Makefile.in to account for mkinstalldirs being removed.
-  Added some "(unsigned long)" typecasts in printf statements in pngvalid.c.
-  Suppressed a compiler warning in png_handle_sPLT().
-  Check for out-of-range text compression mode in png_set_text().
-
-Version 1.5.0beta51 [October 15, 2010]
-  Changed embedded dates to "(PENDING RELEASE) in beta releases (and future
-    rc releases) to minimize the difference between releases.
-
-Version 1.5.0beta52 [October 16, 2010]
-  Restored some of the embedded dates (in png.h, png.c, documentation, etc.)
-
-Version 1.5.0beta53 [October 18, 2010]
-  Updated INSTALL to mention using "make maintainer-clean" and to remove
-    obsolete statement about a custom ltmain.sh
-  Disabled "color-tests" by default in Makefile.am so it will work with
-    automake versions earlier than 1.11.1
-  Use document name "libpng-manual.txt" instead of "libpng-<version>.txt"
-    to simplify version differences.
-  Removed obsolete remarks about setjmp handling from INSTALL.
-  Revised and renamed the typedef in png.h and png.c that was designed
-    to catch library and header mismatch.
-
-Version 1.5.0beta54 [November 10, 2010]
-  Require 48 bytes, not 64 bytes, for big_row_buf in overflow checks.
-  Used a consistent structure for the pngget.c functions.
-
-Version 1.5.0beta55 [November 21, 2010]
-  Revised png_get_uint_32, png_get_int_32, png_get_uint_16 (Cosmin)
-  Moved reading of file signature into png_read_sig (Cosmin)
-  Fixed atomicity of chunk header serialization (Cosmin)
-  Added test for io_state in pngtest.c (Cosmin)
-  Added "#!/bin/sh" at the top of contrib/pngminim/*/gather.sh scripts.
-  Changes to remove gcc warnings (John Bowler)
-    Certain optional gcc warning flags resulted in warnings in libpng code.
-    With these changes only -Wconversion and -Wcast-qual cannot be turned on.
-    Changes are trivial rearrangements of code.  -Wconversion is not possible
-    for pngrutil.c (because of the widespread use of += et al on variables
-    smaller than (int) or (unsigned int)) and -Wcast-qual is not possible
-    with pngwio.c and pngwutil.c because the 'write' callback and zlib
-    compression both fail to declare their input buffers with 'const'.
-
-Version 1.5.0beta56 [December 7, 2010]
-  Added the private PNG_UNUSED() macro definition in pngpriv.h.
-  Added some commentary about PNG_EXPORT in png.h and pngconf.h
-  Revised PNG_EXPORT() macro and added PNG_EXPORTA() macro, with the
-    objective of simplifying and improving the cosmetic appearance of png.h.
-  Fixed some incorrect "=" macro names in pnglibconf.dfa
-  Included documentation of changes in 1.5.0 from 1.4.x in libpng-manual.txt
-
-Version 1.5.0beta57 [December 9, 2010]
-  Documented the pngvalid gamma error summary with additional comments and
-    print statements.
-  Improved missing symbol handling in checksym.awk; symbols missing in both
-    the old and new files can now be optionally ignored, treated as errors
-    or warnings.
-  Removed references to pngvcrd.c and pnggccrd.c from the vstudio project.
-  Updated "libpng14" to "libpng15" in the visualc71 project.
-  Enabled the strip16 tests in pngvalid.`
-  Don't display test results (except PASS/FAIL) when running "make test".
-    Instead put them in pngtest-log.txt
-  Added "--with-zprefix=<string>" to configure.ac
-  Updated the prebuilt configuration files to autoconf version 2.68
-
-Version 1.5.0beta58 [December 19, 2010]
-  Fixed interlace image handling and add test cases (John Bowler)
-  Fixed the clean rule in Makefile.am to remove pngtest-log.txt
-  Made minor changes to work around warnings in gcc 3.4
-
-Version 1.5.0rc01 [December 27, 2010]
-  No changes.
-
-Version 1.5.0rc02 [December 27, 2010]
-  Eliminated references to the scripts/*.def files in project/visualc71.
-
-Version 1.5.0rc03 [December 28, 2010]
-  Eliminated scripts/*.def and revised Makefile.am accordingly
-
-Version 1.5.0rc04 [December 29, 2010]
-  Fixed bug in background transformation handling in pngrtran.c (it was
-    looking for the flag in png_ptr->transformations instead of in
-    png_ptr->flags) (David Raymond).
-
-Version 1.5.0rc05 [December 31, 2010]
-  Fixed typo in a comment in CMakeLists.txt (libpng14 => libpng15) (Cosmin)
-
-Version 1.5.0rc06 [January 4, 2011]
-  Changed the new configure option "zprefix=string" to "zlib-prefix=string"
-
-Version 1.5.0rc07 [January 4, 2011]
-  Updated copyright year.
-
-Version 1.5.0 [January 6, 2011]
-  No changes.
-
-version 1.5.1beta01 [January 8, 2011]
-  Added description of png_set_crc_action() to the manual.
-  Added a note in the manual that the type of the iCCP profile was changed
-    from png_charpp to png_bytepp in png_get_iCCP().  This change happened
-    in version 1.5.0beta36 but is not noted in the CHANGES.  Similarly,
-    it was changed from png_charpp to png_const_bytepp in png_set_iCCP().
-  Ensure that png_rgb_to_gray ignores palette mapped images, if libpng
-    internally happens to call it with one, and fixed a failure to handle
-    palette mapped images correctly.  This fixes CVE-2690.
-
-Version 1.5.1beta02 [January 14, 2011]
-  Fixed a bug in handling of interlaced images (bero at arklinux.org).
-  Updated CMakeLists.txt (Clifford Yapp)
-
-Version 1.5.1beta03 [January 14, 2011]
-  Fixed typecasting of some png_debug() statements (Cosmin)
-
-Version 1.5.1beta04 [January 16, 2011]
-  Updated documentation of png_set|get_tRNS() (Thomas Klausner).
-  Mentioned in the documentation that applications must #include "zlib.h"
-    if they need access to anything in zlib.h, and that a number of
-    macros such as png_memset() are no longer accessible by applications.
-  Corrected pngvalid gamma test "sample" function to access all of the color
-    samples of each pixel, instead of sampling the red channel three times.
-  Prefixed variable names index, div, exp, gamma with "png_" to avoid "shadow"
-    warnings, and (mistakenly) changed png_exp() to exp().
-
-Version 1.5.1beta05 [January 16, 2011]
-  Changed variable names png_index, png_div, png_exp, and png_gamma to
-    char_index, divisor, exp_b10, and gamma_val, respectively, and
-    changed exp() back to png_exp().
-
-Version 1.5.1beta06 [January 20, 2011]
-  Prevent png_push_crc_skip() from hanging while reading an unknown chunk
-    or an over-large compressed zTXt chunk with the progressive reader.
-  Eliminated more GCC "shadow" warnings.
-  Revised png_fixed() in png.c to avoid compiler warning about reaching the
-    end without returning anything.
-
-Version 1.5.1beta07 [January 22, 2011]
-  In the manual, describe the png_get_IHDR() arguments in the correct order.
-  Added const_png_structp and const_png_infop types, and used them in
-    prototypes for most png_get_*() functions.
-
-Version 1.5.1beta08 [January 23, 2011]
-  Added png_get_io_chunk_type() and deprecated png_get_io_chunk_name()
-  Added synopses for the IO_STATE functions and other missing synopses
-    to the manual. Removed the synopses from libpngpf.3 because they
-    were out of date and no longer useful.  Better information can be
-    obtained by reading the prototypes and comments in pngpriv.h
-  Attempted to fix cpp on Solaris with S. Studio 12 cc, fix build
-    Added a make macro DFNCPP that is a CPP that will accept the tokens in
-    a .dfn file and adds configure stuff to test for such a CPP.  ./configure
-    should fail if one is not available.
-  Corrected const_png_ in png.h to png_const_ to avoid polluting the namespace.
-  Added png_get_current_row_number and png_get_current_pass_number for the
-    benefit of the user transform callback.
-  Added png_process_data_pause and png_process_data_skip for the benefit of
-    progressive readers that need to stop data processing or want to optimize
-    skipping of unread data (e.g., if the reader marks a chunk to be skipped.)
-
-Version 1.5.1beta09 [January 24, 2011]
-  Enhanced pngvalid, corrected an error in gray_to_rgb, corrected doc error.
-    pngvalid contains tests of transforms, which tests are currently disabled
-    because they are incompletely tested.  gray_to_rgb was failing to expand
-    the bit depth for smaller bit depth images; this seems to be a long
-    standing error and resulted, apparently, in invalid output
-    (CVE-2011-0408, CERT VU#643140).  The documentation did not accurately
-    describe what libpng really does when converting RGB to gray.
-
-Version 1.5.1beta10 [January 27, 2010]
-  Fixed incorrect examples of callback prototypes in the manual, that were
-    introduced in libpng-1.0.0.
-  In addition the order of the png_get_uint macros with respect to the
-    relevant function definitions has been reversed.  This helps the
-    preprocessing of the symbol files be more robust.  Furthermore, the
-    symbol file preprocessing now uses -DPNG_NO_USE_READ_MACROS even when
-    the library may actually be built with PNG_USE_READ_MACROS; this stops
-    the read macros interfering with the symbol file format.
-  Made the manual, synopses, and function prototypes use the function
-    argument names file_gamma, int_file_gamma, and srgb_intent consistently.
-
-Version 1.5.1beta11 [January 28, 2011]
-  Changed PNG_UNUSED from "param=param;" to "{if(param){}}".
-  Corrected local variable type in new API png_process_data_skip()
-    The type was self-evidently incorrect but only causes problems on 64-bit
-    architectures.
-  Added transform tests to pngvalid and simplified the arguments.
-
-Version 1.5.1rc01 [January 29, 2011]
-  No changes.
-
-Version 1.5.1rc02 [January 31, 2011]
-  Added a request in the manual that applications do not use "png_" or
-    "PNG_" to begin any of their own symbols.
-  Changed PNG_UNUSED to "(void)param;" and updated the commentary in pngpriv.h
-
-Version 1.5.1 [February 3, 2011]
-  No changes.
-
-Version 1.5.2beta01 [February 13, 2011]
-  More -Wshadow fixes for older gcc compilers.  Older gcc versions apparently
-    check formal parameters names in function declarations (as well as
-    definitions) to see if they match a name in the global namespace.
-  Revised PNG_EXPORTA macro to not use an empty parameter, to accommodate the
-    old VisualC++ preprocessor.
-  Turned on interlace handling in png_read_png().
-  Fixed gcc pedantic warnings.
-  Handle longjmp in Cygwin.
-  Fixed png_get_current_row_number() in the interlaced case.
-  Cleaned up ALPHA flags and transformations.
-  Implemented expansion to 16 bits.
-
-Version 1.5.2beta02 [February 19, 2011]
-  Fixed mistake in the descriptions of user read_transform and write_transform
-    function prototypes in the manual.  The row_info struct is png_row_infop.
-  Reverted png_get_current_row_number() to previous (1.5.2beta01) behavior.
-  Corrected png_get_current_row_number documentation
-  Fixed the read/write row callback documentation.
-    This documents the current behavior, where the callback is called after
-    every row with information pertaining to the next row.
-
-Version 1.5.2beta03 [March 3, 2011]
-  Fixed scripts/makefile.vcwin32
-  Updated contrib/pngsuite/README to add the word "modify".
-  Define PNG_ALLOCATED to blank when _MSC_VER<1300.
-
-Version 1.5.2rc01 [March 19, 2011]
-  Define remaining attributes to blank when MSC_VER<1300.
-  ifdef out mask arrays in pngread.c when interlacing is not supported.
-
-Version 1.5.2rc02 [March 22, 2011]
-  Added a hint to try CPP=/bin/cpp if "cpp -E" fails in scripts/pnglibconf.mak
-    and in contrib/pngminim/*/makefile, eg., on SunOS 5.10, and removed "strip"
-    from the makefiles.
-  Fixed a bug (present since libpng-1.0.7) that makes png_handle_sPLT() fail
-    to compile when PNG_NO_POINTER_INDEXING is defined (Chubanov Kirill)
-
-Version 1.5.2rc03 [March 24, 2011]
-  Don't include standard header files in png.h while building the symbol table,
-    to avoid cpp failure on SunOS (introduced PNG_BUILDING_SYMBOL_TABLE macro).
-
-Version 1.5.2 [March 31, 2011]
-  No changes.
-
-Version 1.5.3beta01 [April 1, 2011]
-  Re-initialize the zlib compressor before compressing non-IDAT chunks.
-  Added API functions (png_set_text_compression_level() and four others) to
-    set parameters for zlib compression of non-IDAT chunks.
-
-Version 1.5.3beta02 [April 3, 2011]
-  Updated scripts/symbols.def with new API functions.
-  Only compile the new zlib re-initializing code when text or iCCP is
-    supported, using PNG_WRITE_COMPRESSED_TEXT_SUPPORTED macro.
-  Improved the optimization of the zlib CMF byte (see libpng-1.2.6beta03).
-  Optimize the zlib CMF byte in non-IDAT compressed chunks
-
-Version 1.5.3beta03 [April 16, 2011]
-  Fixed gcc -ansi -pedantic compile. A strict ANSI system does not have
-    snprintf, and the "__STRICT_ANSI__" detects that condition more reliably
-    than __STDC__ (John Bowler).
-  Removed the PNG_PTR_NORETURN attribute because it too dangerous. It tells
-    the compiler that a user supplied callback (the error handler) does not
-    return, yet there is no guarantee in practice that the application code
-    will correctly implement the error handler because the compiler only
-    issues a warning if there is a mistake (John Bowler).
-  Removed the no-longer-used PNG_DEPSTRUCT macro.
-  Updated the zlib version to 1.2.5 in the VStudio project.
-  Fixed 64-bit builds where png_uint_32 is smaller than png_size_t in
-    pngwutil.c (John Bowler).
-  Fixed bug with stripping the filler or alpha channel when writing, that
-    was introduced in libpng-1.5.2beta01 (bug report by Andrew Church).
-
-Version 1.5.3beta04 [April 27, 2011]
-  Updated pngtest.png with the new zlib CMF optimization.
-  Cleaned up conditional compilation code and of background/gamma handling
-    Internal changes only except a new option to avoid compiling the
-    png_build_grayscale_palette API (which is not used at all internally.)
-    The main change is to move the transform tests (READ_TRANSFORMS,
-    WRITE_TRANSFORMS) up one level to the caller of the APIs.  This avoids
-    calls to spurious functions if all transforms are disabled and slightly
-    simplifies those functions.  Pngvalid modified to handle this.
-    A minor change is to stop the strip_16 and expand_16 interfaces from
-    disabling each other; this allows the future alpha premultiplication
-    code to use 16-bit intermediate values while still producing 8-bit output.
-    png_do_background and png_do_gamma have been simplified to take a single
-    pointer to the png_struct rather than pointers to every item required
-    from the png_struct. This makes no practical difference to the internal
-    code.
-  A serious bug in the pngvalid internal routine 'standard_display_init' has
-    been fixed - this failed to initialize the red channel and accidentally
-    initialized the alpha channel twice.
-  Changed png_struct jmp_buf member name from png_jmpbuf to tmp_jmpbuf to
-    avoid a possible clash with the png_jmpbuf macro on some platforms.
-
-Version 1.5.3beta05 [May 6, 2011]
-  Added the "_POSIX_SOURCE" feature test macro to ensure libpng sees the
-    correct API. _POSIX_SOURCE is defined in pngpriv.h, pngtest.c and
-    pngvalid.c to ensure that POSIX conformant systems disable non-POSIX APIs.
-  Removed png_snprintf and added formatted warning messages.  This change adds
-    internal APIs to allow png_warning messages to have parameters without
-    requiring the host OS to implement snprintf.  As a side effect the
-    dependency of the tIME-supporting RFC1132 code on stdio is removed and
-    PNG_NO_WARNINGS does actually work now.
-  Pass "" instead of '\0' to png_default_error() in png_err().  This mistake
-    was introduced in libpng-1.2.20beta01.  This fixes CVE-2011-2691.
-  Added PNG_WRITE_OPTIMIZE_CMF_SUPPORTED macro to make the zlib "CMF" byte
-    optimization configurable.
-  IDAT compression failed if preceded by a compressed text chunk (bug
-    introduced in libpng-1.5.3beta01-02).  This was because the attempt to
-    reset the zlib stream in png_write_IDAT happened after the first IDAT
-    chunk had been deflated - much too late.  In this change internal
-    functions were added to claim/release the z_stream and, hopefully, make
-    the code more robust.  Also deflateEnd checking is added - previously
-    libpng would ignore an error at the end of the stream.
-
-Version 1.5.3beta06 [May 8, 2011]
-  Removed the -D_ALL_SOURCE from definitions for AIX in CMakeLists.txt
-  Implemented premultiplied alpha support: png_set_alpha_mode API
-
-Version 1.5.3beta07 [May 11, 2011]
-  Added expand_16 support to the high level interface.
-  Added named value and 'flag' gamma support to png_set_gamma.  Made a minor
-    change from the previous (unreleased) ABI/API to hide the exact value used
-    for Macs - it's not a good idea to embed this in the ABI!
-  Moved macro definitions for PNG_HAVE_IHDR, PNG_HAVE_PLTE, and PNG_AFTER_IDAT
-    from pngpriv.h to png.h because they must be visible to applications
-    that call png_set_unknown_chunks().
-  Check for up->location !PNG_AFTER_IDAT when writing unknown chunks
-    before IDAT.
-
-Version 1.5.3beta08 [May 16, 2011]
-  Improved "pngvalid --speed" to exclude more of pngvalid from the time.
-  Documented png_set_alpha_mode(), other changes in libpng.3/libpng-manual.txt
-  The cHRM chunk now sets the defaults for png_set_rgb_to_gray() (when negative
-    parameters are supplied by the caller), while in the absence of cHRM
-    sRGB/Rec 709 values are still used.  This introduced a divide-by-zero
-    bug in png_handle_cHRM().
-  The bKGD chunk no longer overwrites the background value set by
-    png_set_background(), allowing the latter to be used before the file
-    header is read. It never performed any useful function to override
-    the default anyway.
-  Added memory overwrite and palette image checks to pngvalid.c
-    Previously palette image code was poorly checked. Since the transformation
-    code has a special palette path in most cases this was a severe weakness.
-  Minor cleanup and some extra checking in pngrutil.c and pngrtran.c. When
-    expanding an indexed image, always expand to RGBA if transparency is
-    present.
-
-Version 1.5.3beta09 [May 17, 2011]
-  Reversed earlier 1.5.3 change of transformation order; move png_expand_16
-    back where it was.  The change doesn't work because it requires 16-bit
-    gamma tables when the code only generates 8-bit ones.  This fails
-    silently; the libpng code just doesn't do any gamma correction.  Moving
-    the tests back leaves the old, inaccurate, 8-bit gamma calculations, but
-    these are clearly better than none!
-
-Version 1.5.3beta10 [May 20, 2011]
-
-  png_set_background() and png_expand_16() did not work together correctly.
-    This problem is present in 1.5.2; if png_set_background is called with
-    need_expand false and the matching 16 bit color libpng erroneously just
-    treats it as an 8-bit color because of where png_do_expand_16 is in the
-    transform list.  This simple fix reduces the supplied colour to 8-bits,
-    so it gets smashed, but this is better than the current behavior.
-  Added tests for expand16, more fixes for palette image tests to pngvalid.
-    Corrects the code for palette image tests and disables attempts to
-    validate palette colors.
-
-Version 1.5.3rc01 [June 3, 2011]
-  No changes.
-
-Version 1.5.3rc02 [June 8, 2011]
-  Fixed uninitialized memory read in png_format_buffer() (Bug report by
-    Frank Busse, CVE-2011-2501, related to CVE-2004-0421).
-
-Version 1.5.3beta11 [June 11, 2011]
-  Fixed png_handle_sCAL which is broken in 1.5. This fixes CVE 2011-2692.
-  Added sCAL to pngtest.png
-  Revised documentation about png_set_user_limits() to say that it also affects
-    png writing.
-  Revised handling of png_set_user_limits() so that it can increase the
-    limit beyond the PNG_USER_WIDTH|HEIGHT_MAX; previously it could only
-    reduce it.
-  Make the 16-to-8 scaling accurate. Dividing by 256 with no rounding is
-    wrong (high by one) 25% of the time. Dividing by 257 with rounding is
-    wrong in 128 out of 65536 cases. Getting the right answer all the time
-    without division is easy.
-  Added "_SUPPORTED" to the PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION macro.
-  Added projects/owatcom, an IDE project for OpenWatcom to replace
-    scripts/makefile.watcom.  This project works with OpenWatcom 1.9. The
-    IDE autogenerates appropriate makefiles (libpng.mk) for batch processing.
-    The project is configurable, unlike the Visual Studio project, so long
-    as the developer has an awk.
-  Changed png_set_gAMA to limit the gamma value range so that the inverse
-    of the stored value cannot overflow the fixed point representation,
-    and changed other things OpenWatcom warns about.
-  Revised pngvalid.c to test PNG_ALPHA_MODE_SUPPORTED correctly. This allows
-    pngvalid to build when ALPHA_MODE is not supported, which is required if
-    it is to build on libpng 1.4.
-  Removed string/memory macros that are no longer used and are not
-    necessarily fully supportable, particularly png_strncpy and png_snprintf.
-  Added log option to pngvalid.c and attempted to improve gamma messages.
-
-Version 1.5.3 [omitted]
-  People found the presence of a beta release following an rc release
-    to be confusing; therefore we bump the version to libpng-1.5.4beta01
-    and there will be no libpng-1.5.3 release.
-
-Version 1.5.4beta01 [June 14, 2011]
-  Made it possible to undefine PNG_READ_16_TO_8_ACCURATE_SCALE_SUPPORTED
-    to get the same (inaccurate) output as libpng-1.5.2 and earlier.
-  Moved definitions of PNG_HAVE_IHDR, PNG_AFTER_IDAT, and PNG_HAVE_PLTE
-    outside of an unknown-chunk block in png.h because they are also
-    needed for other uses.
-
-Version 1.5.4beta02 [June 14, 2011]
-  Fixed and clarified LEGACY 16-to-8 scaling code.
-  Added png_set_chop_16() API, to match inaccurate results from previous
-    libpng versions.
-  Removed the ACCURATE and LEGACY options (they are no longer useable)
-  Use the old scaling method for background if png_set_chop_16() was
-    called.
-  Made png_set_chop_16() API removeable by disabling PNG_CHOP_16_TO_8_SUPPORTED
-
-Version 1.5.4beta03 [June 15, 2011]
-  Fixed a problem in png_do_expand_palette() exposed by optimization in
-    1.5.3beta06
-  Also removed a spurious and confusing "trans" member ("trans") from png_info.
-  The palette expand optimization prevented expansion to an intermediate RGBA
-    form if tRNS was present but alpha was marked to be stripped; this exposed
-    a check for tRNS in png_do_expand_palette() which is inconsistent with the
-    code elsewhere in libpng.
-  Correction to the expand_16 code; removed extra instance of
-    png_set_scale_16_to_8 from pngpriv.h
-
-Version 1.5.4beta04 [June 16, 2011]
-  Added a missing "#ifdef PNG_READ_BACKGROUND_SUPPORTED/#endif" in pngrtran.c
-  Added PNG_TRANSFORM_CHOP_16 to the high-level read transforms.
-  Made PNG_READ_16_TO_8_ACCURATE_SCALE configurable again.  If this is
-    not enabled, png_set_strip_16() and png_do_scale_16_to_8() aren't built.
-  Revised contrib/visupng, gregbook, and pngminim to demonstrate chop_16_to_8
-
-Version 1.5.4beta05 [June 16, 2011]
-  Renamed png_set_strip_16() to png_set_scale_16() and renamed
-    png_set_chop_16() to png_set_strip(16) in an attempt to minimize the
-    behavior changes between libpng14 and libpng15.
-
-Version 1.5.4beta06 [June 18, 2011]
-  Fixed new bug that was causing both strip_16 and scale_16 to be applied.
-
-Version 1.5.4beta07 [June 19, 2011]
-  Fixed pngvalid, simplified macros, added checking for 0 in sCAL.
-    The ACCURATE scale macro is no longer defined in 1.5 - call the
-    png_scale_16_to_8 API.  Made sure that PNG_READ_16_TO_8 is still defined
-    if the png_strip_16_to_8 API is present.  png_check_fp_number now
-    maintains some state so that positive, negative and zero values are
-    identified.  sCAL uses these to be strictly spec conformant.
-
-Version 1.5.4beta08 [June 23, 2011]
-  Fixed pngvalid if ACCURATE_SCALE is defined.
-  Updated scripts/pnglibconf.h.prebuilt.
-
-Version 1.5.4rc01 [June 30, 2011]
-  Define PNG_ALLOCATED to "restrict" only if MSC_VER >= 1400.
-
-Version 1.5.4 [July 7, 2011]
-  No changes.
-
-Version 1.5.5beta01 [July 13, 2011]
-  Fixed some typos and made other minor changes in the manual.
-  Updated contrib/pngminus/makefile.std (Samuli Souminen)
-
-Version 1.5.5beta02 [July 14, 2011]
-  Revised Makefile.am and Makefile.in to look in the right directory for
-    pnglibconf.h.prebuilt
-
-Version 1.5.5beta03 [July 27, 2011]
-  Enabled compilation with g++ compiler.  This compiler does not recognize
-    the file extension, so it always compiles with C++ rules.  Made minor
-    changes to pngrutil.c to cast results where C++ expects it but C does not.
-  Minor editing of libpng.3 and libpng-manual.txt.
-
-Version 1.5.5beta04 [July 29, 2011]
-  Revised CMakeLists.txt (Clifford Yapp)
-  Updated commentary about the png_rgb_to_gray() default coefficients
-    in the manual and in pngrtran.c
-
-Version 1.5.5beta05 [August 17, 2011]
-  Prevent unexpected API exports from non-libpng DLLs on Windows.  The "_DLL"
-    is removed from the test of whether a DLL is being built (this erroneously
-    caused the libpng APIs to be marked as DLL exports in static builds under
-    Microsoft Visual Studio).  Almost all of the libpng building configuration
-    is moved from pngconf.h to pngpriv.h, but PNG_DLL_EXPORT remains in
-    pngconf.h, though, so that it is colocated with the import definition (it
-    is no longer used anywhere in the installed headers).  The VStudio project
-    definitions have been cleaned up: "_USRDLL" has been removed from the
-    static library builds (this was incorrect), and PNG_USE_DLL has been added
-    to pngvalid to test the functionality (pngtest does not supply it,
-    deliberately).  The spurious "_EXPORTS" has been removed from the
-    libpng build (all these errors were a result of copy/paste between project
-    configurations.)
-  Added new types and internal functions for CIE RGB end point handling to
-    pngpriv.h (functions yet to be implemented).
-
-Version 1.5.5beta06 [August 26, 2011]
-  Ensure the CMAKE_LIBRARY_OUTPUT_DIRECTORY is set in CMakeLists.txt
-    (Clifford Yap)
-  Fixes to rgb_to_gray and cHRM XYZ APIs (John Bowler):
-    The rgb_to_gray code had errors when combined with gamma correction.
-    Some pixels were treated as true grey when they weren't and such pixels
-    and true grey ones were not gamma corrected (the original value of the
-    red component was used instead).  APIs to get and set cHRM using color
-    space end points have been added and the rgb_to_gray code that defaults
-    based on cHRM, and the divide-by-zero bug in png_handle_cHRM (CERT
-    VU#477046, CVE-2011-3328, introduced in 1.5.4) have been corrected.
-  A considerable number of tests has been added to pngvalid for the
-    rgb_to_gray transform.
-  Arithmetic errors in rgb_to_gray whereby the calculated gray value was
-    truncated to the bit depth rather than rounded have been fixed except in
-    the 8-bit non-gamma-corrected case (where consistency seems more important
-    than correctness.)  The code still has considerable inaccuracies in the
-    8-bit case because 8-bit linear arithmetic is used.
-
-Version 1.5.5beta07 [September 7, 2011]
-  Added "$(ARCH)" option to makefile.darwin
-  Added SunOS support to configure.ac and Makefile.am
-  Changed png_chunk_benign_error() to png_warning() in png.c, in
-    png_XYZ_from_xy_checked().
-
-Version 1.5.5beta08 [September 10, 2011]
-  Fixed 64-bit compilation errors (gcc). The errors fixed relate
-    to conditions where types that are 32 bits in the GCC 32-bit
-    world (uLong and png_size_t) become 64 bits in the 64-bit
-    world.  This produces potential truncation errors which the
-    compiler correctly flags.
-  Relocated new HAVE_SOLARIS_LD definition in configure.ac
-  Constant changes for 64-bit compatibility (removal of L suffixes). The
-    16-bit cases still use "L" as we don't have a 16-bit test system.
-
-Version 1.5.5rc01 [September 15, 2011]
-  Removed "L" suffixes in pngpriv.h
-
-Version 1.5.5 [September 22, 2011]
-  No changes.
-
-Version 1.5.6beta01 [September 22, 2011]
-  Fixed some 64-bit type conversion warnings in pngrtran.c
-  Moved row_info from png_struct to a local variable.
-  The various interlace mask arrays have been made into arrays of
-    bytes and made PNG_CONST and static (previously some arrays were
-    marked PNG_CONST and some weren't).
-  Additional checks have been added to the transform code to validate the
-    pixel depths after the transforms on both read and write.
-  Removed some redundant code from pngwrite.c, in png_destroy_write_struct().
-  Changed chunk reading/writing code to use png_uint_32 instead of png_byte[4].
-    This removes the need to allocate temporary strings for chunk names on
-    the stack in the read/write code.  Unknown chunk handling still uses the
-    string form because this is exposed in the API.
-
-Version 1.5.6beta02 [September 26, 2011]
-  Added a note in the manual the png_read_update_info() must be called only
-    once with a particular info_ptr.
-  Fixed a typo in the definition of the new PNG_STRING_FROM_CHUNK(s,c) macro.
-
-Version 1.5.6beta03 [September 28, 2011]
-  Revised test-pngtest.sh to report FAIL when pngtest fails.
-  Added "--strict" option to pngtest, to report FAIL when the failure is
-    only because the resulting valid files are different.
-  Revised CMakeLists.txt to work with mingw and removed some material from
-    CMakeLists.txt that is no longer useful in libpng-1.5.
-
-Version 1.5.6beta04 [October 5, 2011]
-  Fixed typo in Makefile.in and Makefile.am ("-M Wl" should be "-M -Wl")."
-
-Version 1.5.6beta05 [October 12, 2011]
-  Speed up png_combine_row() for interlaced images. This reduces the generality
-    of the code, allowing it to be optimized for Adam7 interlace.  The masks
-    passed to png_combine_row() are now generated internally, avoiding
-    some code duplication and localizing the interlace handling somewhat.
-  Align png_struct::row_buf - previously it was always unaligned, caused by
-    a bug in the code that attempted to align it; the code needs to subtract
-    one from the pointer to take account of the filter byte prepended to
-    each row.
-  Optimized png_combine_row() when rows are aligned. This gains a small
-    percentage for 16-bit and 32-bit pixels in the typical case where the
-    output row buffers are appropriately aligned. The optimization was not
-    previously possible because the png_struct buffer was always misaligned.
-  Fixed bug in png_write_chunk_header() debug print, introduced in 1.5.6beta01.
-
-Version 1.5.6beta06 [October 17, 2011]
-  Removed two redundant tests for uninitialized row.
-  Fixed a relatively harmless memory overwrite in compressed text writing
-    with a 1 byte zlib buffer.
-  Add ability to call png_read_update_info multiple times to pngvalid.c.
-  Fixes for multiple calls to png_read_update_info. These fixes attend to
-    most of the errors revealed in pngvalid, however doing the gamma work
-    twice results in inaccuracies that can't be easily fixed.  There is now
-    a warning in the code if this is going to happen.
-  Turned on multiple png_read_update_info in pngvalid transform tests.
-  Prevent libpng from overwriting unused bits at the end of the image when
-    it is not byte aligned, while reading. Prior to libpng-1.5.6 libpng would
-    overwrite the partial byte at the end of each row if the row width was not
-    an exact multiple of 8 bits and the image is not interlaced.
-
-Version 1.5.6beta07 [October 21, 2011]
-  Made png_ptr->prev_row an aligned pointer into png_ptr->big_prev_row
-    (Mans Rullgard).
-
-Version 1.5.6rc01 [October 26, 2011]
-  Changed misleading "Missing PLTE before cHRM" warning to "Out of place cHRM"
-
-Version 1.5.6rc02 [October 27, 2011]
-  Added LSR() macro to defend against buggy compilers that evaluate non-taken
-    code branches and complain about out-of-range shifts.
-
-Version 1.5.6rc03 [October 28, 2011]
-  Renamed the LSR() macro to PNG_LSR() and added PNG_LSL() macro.
-  Fixed compiler warnings with Intel and MSYS compilers. The logical shift
-    fix for Microsoft Visual C is required by other compilers, so this
-    enables that fix for all compilers when using compile-time constants.
-    Under MSYS 'byte' is a name declared in a system header file, so we
-    changed the name of a local variable to avoid the warnings that result.
-  Added #define PNG_ALIGN_TYPE PNG_ALIGN_NONE to contrib/pngminim/*/pngusr.h
-
-Version 1.5.6 [November 3, 2011]
-  No changes.
-
-Version 1.5.7beta01 [November 4, 2011]
-  Added support for ARM processor, when decoding all PNG up-filtered rows
-    and any other-filtered rows with 3 or 4 bytes per pixel (Mans Rullgard).
-  Fixed bug in pngvalid on early allocation failure; fixed type cast in
-    pngmem.c; pngvalid would attempt to call png_error() if the allocation
-    of a png_struct or png_info failed. This would probably have led to a
-    crash.  The pngmem.c implementation of png_malloc() included a cast
-    to png_size_t which would fail on large allocations on 16-bit systems.
-  Fix for the preprocessor of the Intel C compiler. The preprocessor
-    splits adjacent @ signs with a space; this changes the concatenation
-    token from @-@-@ to PNG_JOIN; that should work with all compiler
-    preprocessors.
-  Paeth filter speed improvements from work by Siarhei Siamashka. This
-    changes the 'Paeth' reconstruction function to improve the GCC code
-    generation on x86. The changes are only part of the suggested ones;
-    just the changes that definitely improve speed and remain simple.
-    The changes also slightly increase the clarity of the code.
-
-Version 1.5.7beta02 [November 11, 2011]
-  Check compression_type parameter in png_get_iCCP and remove spurious
-    casts. The compression_type parameter is always assigned to, so must
-    be non-NULL. The cast of the profile length potentially truncated the
-    value unnecessarily on a 16-bit int system, so the cast of the (byte)
-    compression type to (int) is specified by ANSI-C anyway.
-  Fixed FP division by zero in pngvalid.c; the 'test_pixel' code left
-    the sBIT fields in the test pixel as 0, which resulted in a floating
-    point division by zero which was irrelevant but causes systems where
-    FP exceptions cause a crash. Added code to pngvalid to turn on FP
-    exceptions if the appropriate glibc support is there to ensure this is
-    tested in the future.
-  Updated scripts/pnglibconf.mak and scripts/makefile.std to handle the
-    new PNG_JOIN macro.
-  Added versioning to pnglibconf.h comments.
-  Simplified read/write API initial version; basic read/write tested on
-    a variety of images, limited documentation (in the header file.)
-  Installed more accurate linear to sRGB conversion tables. The slightly
-    modified tables reduce the number of 16-bit values that
-    convert to an off-by-one 8-bit value.  The "makesRGB.c" code that was used
-    to generate the tables is now in a contrib/sRGBtables sub-directory.
-
-Version 1.5.7beta03 [November 17, 2011]
-  Removed PNG_CONST from the sRGB table declarations in pngpriv.h and png.c
-  Added run-time detection of NEON support.
-  Added contrib/libtests; includes simplified API test and timing test and
-    a color conversion utility for rapid checking of failed 'pngstest' results.
-  Multiple transform bug fixes plus a work-round for double gamma correction.
-    libpng does not support more than one transform that requires linear data
-    at once - if this is tried typically the results is double gamma
-    correction. Since the simplified APIs can need rgb to gray combined with
-    a compose operation it is necessary to do one of these outside the main
-    libpng transform code. This check-in also contains fixes to various bugs
-    in the simplified APIs themselves and to some bugs in compose and rgb to
-    gray (on palette) itself.
-  Fixes for C++ compilation using g++ When libpng source is compiled
-    using g++. The compiler imposes C++ rules on the C source; thus it
-    is desirable to make the source work with either C or C++ rules
-    without throwing away useful error information.  This change adds
-    png_voidcast to allow C semantic (void*) cases or the corresponding
-    C++ static_cast operation, as appropriate.
-  Added --noexecstack to assembler file compilation. GCC does not set
-    this on assembler compilation, even though it does on C compilation.
-    This creates security issues if assembler code is enabled; the
-    work-around is to set it by default in the flags for $(CCAS)
-  Work around compilers that don't support declaration of const data. Some
-    compilers fault 'extern const' data declarations (because the data is
-    not initialized); this turns on const-ness only for compilers where
-    this is known to work.
-
-Version 1.5.7beta04 [November 17, 2011]
-  Since the gcc driver does not recognize the --noexecstack flag, we must
-    use the -Wa prefix to have it passed through to the assembler.
-    Also removed a duplicate setting of this flag.
-  Added files that were omitted from the libpng-1.5.7beta03 zip distribution.
-
-Version 1.5.7beta05 [November 25, 2011]
-  Removed "zTXt" from warning in generic chunk decompression function.
-  Validate time settings passed to png_set_tIME() and png_convert_to_rfc1123()
-    (Frank Busse). Note: This prevented CVE-2015-7981 from affecting
-    libpng-1.5.7 and later.
-  Added MINGW support to CMakeLists.txt
-  Reject invalid compression flag or method when reading the iTXt chunk.
-  Backed out 'simplified' API changes. The API seems too complex and there
-    is a lack of consensus or enthusiasm for the proposals.  The API also
-    reveals significant bugs inside libpng (double gamma correction and the
-    known bug of being unable to retrieve a corrected palette). It seems
-    better to wait until the bugs, at least, are corrected.
-  Moved pngvalid.c into contrib/libtests
-  Rebuilt Makefile.in, configure, etc., with autoconf-2.68
-
-Version 1.5.7rc01 [December 1, 2011]
-  Replaced an "#if" with "#ifdef" in pngrtran.c
-  Revised #if PNG_DO_BC block in png.c (use #ifdef and add #else)
-
-Version 1.5.7rc02 [December 5, 2011]
-  Revised project files and contrib/pngvalid/pngvalid.c to account for
-    the relocation of pngvalid into contrib/libtests.
-  Revised pngconf.h to use " __declspec(restrict)" only when MSC_VER >= 1400,
-    as in libpng-1.5.4.
-  Put CRLF line endings in the owatcom project files.
-
-Version 1.5.7rc03 [December 7, 2011]
-  Updated CMakeLists.txt to account for the relocation of pngvalid.c
-
-Version 1.5.7 [December 15, 2011]
-  Minor fixes to pngvalid.c for gcc 4.6.2 compatibility to remove warnings
-    reported by earlier versions.
-  Fixed minor memset/sizeof errors in pngvalid.c.
-
-Version 1.6.0beta01 [December 15, 2011]
-  Removed machine-generated configure files from the GIT repository (they will
-    continue to appear in the tarball distributions and in the libpng15 and
-    earlier GIT branches).
-  Restored the new 'simplified' API, which was started in libpng-1.5.7beta02
-    but later deleted from libpng-1.5.7beta05.
-  Added example programs for the new 'simplified' API.
-  Added ANSI-C (C90) headers and require them, and take advantage of the
-    change. Also fixed some of the projects/* and contrib/* files that needed
-    updates for libpng16 and the move of pngvalid.c.
-    With this change the required ANSI-C header files are assumed to exist: the
-    implementation must provide float.h, limits.h, stdarg.h and stddef.h and
-    libpng relies on limits.h and stddef.h existing and behaving as defined
-    (the other two required headers aren't used).  Non-ANSI systems that don't
-    have stddef.h or limits.h will have to provide an appropriate fake
-    containing the relevant types and #defines.
-  Dropped support for 16-bit platforms. The use of FAR/far has been eliminated
-    and the definition of png_alloc_size_t is now controlled by a flag so
-    that 'small size_t' systems can select it if necessary.  Libpng 1.6 may
-    not currently work on such systems -- it seems likely that it will
-    ask 'malloc' for more than 65535 bytes with any image that has a
-    sufficiently large row size (rather than simply failing to read such
-    images).
-  New tools directory containing tools used to generate libpng code.
-  Fixed race conditions in parallel make builds. With higher degrees of
-    parallelism during 'make' the use of the same temporary file names such
-    as 'dfn*' can result in a race where a temporary file from one arm of the
-    build is deleted or overwritten in another arm.  This changes the
-    temporary files for suffix rules to always use $* and ensures that the
-    non-suffix rules use unique file names.
-
-Version 1.6.0beta02 [December 21, 2011]
-  Correct configure builds where build and source directories are separate.
-    The include path of 'config.h' was erroneously made relative in pngvalid.c
-    in libpng 1.5.7.
-
-Version 1.6.0beta03 [December 22, 2011]
-  Start-up code size improvements, error handler flexibility. These changes
-    alter how the tricky allocation of the initial png_struct and png_info
-    structures are handled. png_info is now handled in pretty much the same
-    way as everything else, except that the allocations handle NULL return
-    silently.  png_struct is changed in a similar way on allocation and on
-    deallocation a 'safety' error handler is put in place (which should never
-    be required).  The error handler itself is changed to permit mismatches
-    in the application and libpng error buffer size; however, this means a
-    silent change to the API to return the jmp_buf if the size doesn't match
-    the size from the libpng compilation; libpng now allocates the memory and
-    this may fail.  Overall these changes result in slight code size
-    reductions; however, this is a reduction in code that is always executed
-    so is particularly valuable.  Overall on a 64-bit system the libpng DLL
-    decreases in code size by 1733 bytes.  pngerror.o increases in size by
-    about 465 bytes because of the new functionality.
-  Added png_convert_to_rfc1123_buffer() and deprecated png_convert_to_rfc1123()
-    to avoid including a spurious buffer in the png_struct.
-
-Version 1.6.0beta04 [December 30, 2011]
-  Regenerated configure scripts with automake-1.11.2
-  Eliminated png_info_destroy(). It is now used only in png.c and only calls
-    one other internal function and memset().
-  Enabled png_get_sCAL_fixed() if floating point APIs are enabled. Previously
-    it was disabled whenever internal fixed point arithmetic was selected,
-    which meant it didn't exist even on systems where FP was available but not
-    preferred.
-  Added pngvalid.c compile time checks for const APIs.
-  Implemented 'restrict' for png_info and png_struct. Because of the way
-    libpng works both png_info and png_struct are always accessed via a
-    single pointer.  This means adding C99 'restrict' to the pointer gives
-    the compiler some opportunity to optimize the code.  This change allows
-    that.
-  Moved AC_MSG_CHECKING([if libraries can be versioned]) later to the proper
-    location in configure.ac (Gilles Espinasse).
-  Changed png_memcpy to C assignment where appropriate. Changed all those
-    uses of png_memcpy that were doing a simple assignment to assignments
-    (all those cases where the thing being copied is a non-array C L-value).
-  Added some error checking to png_set_*() routines.
-  Removed the reference to the non-exported function png_memcpy() from
-    example.c.
-  Fixed the Visual C 64-bit build - it requires jmp_buf to be aligned, but
-    it had become misaligned.
-  Revised contrib/pngminus/pnm2png.c to avoid warnings when png_uint_32
-    and unsigned long are of different sizes.
-
-Version 1.6.0beta05 [January 15, 2012]
-  Updated manual with description of the simplified API (copied from png.h)
-  Fix bug in pngerror.c: some long warnings were being improperly truncated
-    (CVE-2011-3464, bug introduced in libpng-1.5.3beta05).
-
-Version 1.6.0beta06 [January 24, 2012]
-  Added palette support to the simplified APIs. This commit
-    changes some of the macro definitions in png.h, app code
-    may need corresponding changes.
-  Increased the formatted warning buffer to 192 bytes.
-  Added color-map support to simplified API. This is an initial version for
-    review; the documentation has not yet been updated.
-  Fixed Min/GW uninstall to remove libpng.dll.a
-
-Version 1.6.0beta07 [January 28, 2012]
-  Eliminated Intel icc/icl compiler warnings. The Intel (GCC derived)
-    compiler issues slightly different warnings from those issued by the
-    current versions of GCC. This eliminates those warnings by
-    adding/removing casts and small code rewrites.
-  Updated configure.ac from autoupdate: added --enable-werror option.
-    Also some layout regularization and removal of introduced tab characters
-    (replaced with 3-character indentation).  Obsolete macros identified by
-    autoupdate have been removed; the replacements are all in 2.59 so
-    the pre-req hasn't been changed.  --enable-werror checks for support
-    for -Werror (or the given argument) in the compiler.  This mimics the
-    gcc configure option by allowing -Werror to be turned on safely; without
-    the option the tests written in configure itself fail compilation because
-    they cause compiler warnings.
-  Rewrote autogen.sh to run autoreconf instead of running tools one-by-one.
-  Conditionalize the install rules for MINGW and CYGWIN in CMakeLists.txt and
-    set CMAKE_LIBRARY_OUTPUT_DIRECTORY to "lib" on all platforms (C. Yapp).
-  Freeze libtool files in the 'scripts' directory. This version of autogen.sh
-    attempts to dissuade people from running it when it is not, or should not,
-    be necessary.  In fact, autogen.sh does not work when run in a libpng
-    directory extracted from a tar distribution anymore. You must run it in
-    a GIT clone instead.
-  Added two images to contrib/pngsuite (1-bit and 2-bit transparent grayscale),
-    and renamed three whose names were inconsistent with those in
-    pngsuite/README.txt.
-
-Version 1.6.0beta08 [February 1, 2012]
-  Fixed Image::colormap misalignment in pngstest.c
-  Check libtool/libtoolize version number (2.4.2) in configure.ac
-  Divide test-pngstest.sh into separate pngstest runs for basic and
-    transparent images.
-  Moved automake options to AM_INIT_AUTOMAKE in configure.ac
-  Added color-tests, silent-rules (Not yet implemented in Makefile.am) and
-    version checking to configure.ac
-  Improved pngstest speed by not doing redundant tests and add const to
-    the background parameter of png_image_finish_read. The --background
-    option is now done automagically only when required, so that command-line
-    option no longer exists.
-  Cleaned up pngpriv.h to consistently declare all functions and data.
-    Also eliminated PNG_CONST_DATA, which is apparently not needed but we
-    can't be sure until it is gone.
-  Added symbol prefixing that allows all the libpng external symbols
-    to be prefixed (suggested by Reuben Hawkins).
-  Updated "ftbb*.png" list in the owatcom and vstudio projects.
-  Fixed 'prefix' builds on clean systems. The generation of pngprefix.h
-    should not require itself.
-  Updated INSTALL to explain that autogen.sh must be run in a GIT clone,
-    not in a libpng directory extracted from a tar distribution.
-
-Version 1.6.0beta09 [February 1, 2012]
-  Reverted the prebuilt configure files to libpng-1.6.0beta05 condition.
-
-Version 1.6.0beta10 [February 3, 2012]
-  Added Z_SOLO for zlib-1.2.6+ and correct pngstest tests
-  Updated list of test images in CMakeLists.txt
-  Updated the prebuilt configure files to current condition.
-  Revised INSTALL information about autogen.sh; it works in tar distributions.
-
-Version 1.6.0beta11 [February 16, 2012]
-  Fix character count in pngstest command in projects/owatcom/pngstest.tgt
-  Revised test-pngstest.sh to report PASS/FAIL for each image.
-  Updated documentation about the simplified API.
-  Corrected estimate of error in libpng png_set_rgb_to_gray API.  The API is
-    extremely inaccurate for sRGB conversions because it uses an 8-bit
-    intermediate linear value and it does not use the sRGB transform, so it
-    suffers from the known instability in gamma transforms for values close
-    to 0 (see Poynton).  The net result is that the calculation has a maximum
-    error of 14.99/255; 0.5/255^(1/2.2).  pngstest now uses 15 for the
-    permitted 8-bit error. This may still not be enough because of arithmetic
-    error.
-  Removed some unused arrays (with #ifdef) from png_read_push_finish_row().
-  Fixed a memory overwrite bug in simplified read of RGB PNG with
-    non-linear gamma Also bugs in the error checking in pngread.c and changed
-    quite a lot of the checks in pngstest.c to be correct; either correctly
-    written or not over-optimistic.  The pngstest changes are insufficient to
-    allow all possible RGB transforms to be passed; pngstest cmppixel needs
-    to be rewritten to make it clearer which errors it allows and then changed
-    to permit known inaccuracies.
-  Removed tests for no-longer-used *_EMPTY_PLTE_SUPPORTED from pngstruct.h
-  Fixed fixed/float API export conditionals. 1) If FIXED_POINT or
-    FLOATING_POINT options were switched off, png.h ended up with lone ';'
-    characters.  This is not valid ANSI-C outside a function.  The ';'
-    characters have been moved inside the definition of PNG_FP_EXPORT and
-    PNG_FIXED_EXPORT. 2) If either option was switched off, the declaration
-    of the corresponding functions were completely omitted, even though some
-    of them are still used internally.  The result is still valid, but
-    produces warnings from gcc with some warning options (including -Wall). The
-    fix is to cause png.h to declare the functions with PNG_INTERNAL_FUNCTION
-    when png.h is included from pngpriv.h.
-  Check for invalid palette index while reading paletted PNG.  When one is
-    found, issue a warning and increase png_ptr->num_palette accordingly.
-    Apps are responsible for checking to see if that happened.
-
-Version 1.6.0beta12 [February 18, 2012]
-  Do not increase num_palette on invalid_index.
-  Relocated check for invalid palette index to pngrtran.c, after unpacking
-    the sub-8-bit pixels.
-  Fixed CVE-2011-3026 buffer overrun bug.  This bug was introduced when
-    iCCP chunk support was added at libpng-1.0.6. Deal more correctly with the
-    test on iCCP chunk length. Also removed spurious casts that may hide
-    problems on 16-bit systems.
-
-Version 1.6.0beta13 [February 24, 2012]
-  Eliminated redundant png_push_read_tEXt|zTXt|iTXt|unknown code from
-    pngpread.c and use the sequential png_handle_tEXt, etc., in pngrutil.c;
-    now that png_ptr->buffer is inaccessible to applications, the special
-    handling is no longer useful.
-  Added PNG_SAFE_LIMITS feature to pnglibconf.dfa, pngpriv.h, and new
-    pngusr.dfa to reset the user limits to safe ones if PNG_SAFE_LIMITS is
-    defined.  To enable, use "CPPFLAGS=-DPNG_SAFE_LIMITS_SUPPORTED=1" on the
-    configure command or put #define PNG_SAFE_LIMITS_SUPPORTED in
-    pnglibconf.h.prebuilt and pnglibconf.h.
-
-Version 1.6.0beta14 [February 27, 2012]
-  Added information about the new limits in the manual.
-  Updated Makefile.in
-
-Version 1.6.0beta15 [March 2, 2012]
-  Removed unused "current_text" members of png_struct and the png_free()
-    of png_ptr->current_text from pngread.c
-  Rewrote pngstest.c for substantial speed improvement.
-  Fixed transparent pixel and 16-bit rgb tests in pngstest and removed a
-    spurious check in pngwrite.c
-  Added PNG_IMAGE_FLAG_FAST for the benefit of applications that store
-    intermediate files, or intermediate in-memory data, while processing
-    image data with the simplified API.  The option makes the files larger
-    but faster to write and read.  pngstest now uses this by default; this
-    can be disabled with the --slow option.
-  Improved pngstest fine tuning of error numbers, new test file generator.
-    The generator generates images that test the full range of sample values,
-    allow the error numbers in pngstest to be tuned and checked.  makepng
-    also allows generation of images with extra chunks, although this is
-    still work-in-progress.
-  Added check for invalid palette index while reading.
-  Fixed some bugs in ICC profile writing. The code should now accept
-    all potentially valid ICC profiles and reject obviously invalid ones.
-    It now uses png_error() to do so rather than casually writing a PNG
-    without the necessary color data.
-  Removed whitespace from the end of lines in all source files and scripts.
-
-Version 1.6.0beta16 [March 6, 2012]
-  Relocated palette-index checking function from pngrutil.c to pngtrans.c
-  Added palette-index checking while writing.
-  Changed png_inflate() and calling routines to avoid overflow problems.
-    This is an intermediate check-in that solves the immediate problems and
-    introduces one performance improvement (avoiding a copy via png_ptr->zbuf.)
-    Further changes will be made to make ICC profile handling more secure.
-  Fixed build warnings (MSVC, GCC, GCC v3). Cygwin GCC with default options
-    declares 'index' as a global, causing a warning if it is used as a local
-    variable.  GCC 64-bit warns about assigning a (size_t) (unsigned 64-bit)
-    to an (int) (signed 32-bit).  MSVC, however, warns about using the
-    unary '-' operator on an unsigned value (even though it is well defined
-    by ANSI-C to be ~x+1).  The padding calculation was changed to use a
-    different method.  Removed the tests on png_ptr->pass.
-  Added contrib/libtests/tarith.c to test internal arithmetic functions from
-    png.c. This is a libpng maintainer program used to validate changes to the
-    internal arithmetic functions.
-  Made read 'inflate' handling like write 'deflate' handling. The read
-    code now claims and releases png_ptr->zstream, like the write code.
-    The bug whereby the progressive reader failed to release the zstream
-    is now fixed, all initialization is delayed, and the code checks for
-    changed parameters on deflate rather than always calling
-    deflatedEnd/deflateInit.
-  Validate the zTXt strings in pngvalid.
-  Added code to validate the windowBits value passed to deflateInit2().
-    If the call to deflateInit2() is wrong a png_warning will be issued
-    (in fact this is harmless, but the PNG data produced may be sub-optimal).
-
-Version 1.6.0beta17 [March 10, 2012]
-  Fixed PNG_LIBPNG_BUILD_BASE_TYPE definition.
-  Reject all iCCP chunks after the first, even if the first one is invalid.
-  Deflate/inflate was reworked to move common zlib calls into single
-    functions [rw]util.c.  A new shared keyword check routine was also added
-    and the 'zbuf' is no longer allocated on progressive read.  It is now
-    possible to call png_inflate() incrementally.  A warning is no longer
-    issued if the language tag or translated keyword in the iTXt chunk
-    has zero length.
-  If benign errors are disabled use maximum window on ancillary inflate.
-    This works round a bug introduced in 1.5.4 where compressed ancillary
-    chunks could end up with a too-small windowBits value in the deflate
-    header.
-
-Version 1.6.0beta18 [March 16, 2012]
-  Issue a png_benign_error() instead of png_warning() about bad palette index.
-  In pngtest, treat benign errors as errors if "-strict" is present.
-  Fixed an off-by-one error in the palette index checking function.
-  Fixed a compiler warning under Cygwin (Windows-7, 32-bit system)
-  Revised example.c to put text strings in a temporary character array
-    instead of directly assigning string constants to png_textp members.
-    This avoids compiler warnings when -Wwrite-strings is enabled.
-  Added output flushing to aid debugging under Visual Studio. Unfortunately
-    this is necessary because the VS2010 output window otherwise simply loses
-    the error messages on error (they weren't flushed to the window before
-    the process exited, apparently!)
-  Added configuration support for benign errors and changed the read
-    default. Also changed some warnings in the iCCP and sRGB handling
-    from to benign errors. Configuration now makes read benign
-    errors warnings and write benign errors to errors by default (thus
-    changing the behavior on read).  The simplified API always forces
-    read benign errors to warnings (regardless of the system default, unless
-    this is disabled in which case the simplified API can't be built.)
-
-Version 1.6.0beta19 [March 18, 2012]
-  Work around for duplicate row start calls; added warning messages.
-    This turns on PNG_FLAG_DETECT_UNINITIALIZED to detect app code that
-    fails to call one of the 'start' routines (not enabled in libpng-1.5
-    because it is technically an API change, since it did normally work
-    before.)  It also makes duplicate calls to png_read_start_row (an
-    internal function called at the start of the image read) benign, as
-    they were before changes to use png_inflate_claim. Somehow webkit is
-    causing this to happen; this is probably a mis-feature in the zlib
-    changes so this commit is only a work-round.
-  Removed erroneous setting of DETECT_UNINITIALIZED and added more
-    checks. The code now does a png_error if an attempt is made to do the
-    row initialization twice; this is an application error and it has
-    serious consequences because the transform data in png_struct is
-    changed by each call.
-  Added application error reporting and added chunk names to read
-    benign errors; also added --strict to pngstest - not enabled
-    yet because a warning is produced.
-  Avoid the double gamma correction warning in the simplified API.
-    This allows the --strict option to pass in the pngstest checks
-
-Version 1.6.0beta20 [March 29, 2012]
-  Changed chunk handler warnings into benign errors, incrementally load iCCP
-  Added checksum-icc.c to contrib/tools
-  Prevent PNG_EXPAND+PNG_SHIFT doing the shift twice.
-  Recognize known sRGB ICC profiles while reading; prefer writing the
-    iCCP profile over writing the sRGB chunk, controlled by the
-    PNG_sRGB_PROFILE_CHECKS option.
-  Revised png_set_text_2() to avoid potential memory corruption (fixes
-    CVE-2011-3048, also known as CVE-2012-3425).
-
-Version 1.6.0beta21 [April 27, 2012]
-  Revised scripts/makefile.darwin: use system zlib; remove quotes around
-    architecture list; add missing ppc architecture; add architecture options
-    to shared library link; don't try to create a shared lib based on missing
-    RELEASE variable.
-  Enable png_set_check_for_invalid_index() for both read and write.
-  Removed #ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED in pngpriv.h around
-    declaration of png_handle_unknown().
-  Added -lssp_nonshared in a comment in scripts/makefile.freebsd
-    and changed deprecated NOOBJ and NOPROFILE to NO_OBJ and NO_PROFILE.
-
-Version 1.6.0beta22 [May 23, 2012]
-  Removed need for -Wno-cast-align with clang.  clang correctly warns on
-    alignment increasing pointer casts when -Wcast-align is passed. This
-    fixes the cases that clang warns about either by eliminating the
-    casts from png_bytep to png_uint_16p (pngread.c), or, for pngrutil.c
-    where the cast is previously verified or pngstest.c where it is OK, by
-    introducing new png_aligncast macros to do the cast in a way that clang
-    accepts.
-
-Version 1.6.0beta23 [June 6, 2012]
-  Revised CMakeLists.txt to not attempt to make a symlink under mingw.
-  Made fixes for new optimization warnings from gcc 4.7.0. The compiler
-    performs an optimization which is safe; however it then warns about it.
-    Changing the type of 'palette_number' in pngvalid.c removes the warning.
-  Do not depend upon a GCC feature macro being available for use in generating
-    the linker mapfile symbol prefix.
-  Improved performance of new do_check_palette_indexes() function (only
-    update the value when it actually increases, move test for whether
-    the check is wanted out of the function.
-
-Version 1.6.0beta24 [June 7, 2012]
-  Don't check palette indexes if num_palette is 0 (as it can be in MNG files).
-
-Version 1.6.0beta25 [June 16, 2012]
-  Revised png_set_keep_unknown_chunks() so num_chunks < 0 means ignore all
-    unknown chunks and all known chunks except for IHDR, PLTE, tRNS, IDAT,
-    and IEND.  Previously it only meant ignore all unknown chunks, the
-    same as num_chunks == 0. Revised png_image_skip_unused_chunks() to
-    provide a list of chunks to be processed instead of a list of chunks to
-    ignore.  Revised contrib/gregbook/readpng2.c accordingly.
-
-Version 1.6.0beta26 [July 10, 2012]
-  Removed scripts/makefile.cegcc from the *.zip and *.7z distributions; it
-    depends on configure, which is not included in those archives.
-  Moved scripts/chkfmt to contrib/tools.
-  Changed "a+w" to "u+w" in Makefile.in to fix CVE-2012-3386.
-
-Version 1.6.0beta27 [August 11, 2012]
-  Do not compile PNG_DEPRECATED, PNG_ALLOC and PNG_PRIVATE when __GNUC__ < 3.
-  Do not use __restrict when GNUC is <= 3.1
-  Removed references to png_zalloc() and png_zfree() from the manual.
-  Fixed configurations where floating point is completely disabled.  Because
-    of the changes to support symbol prefixing PNG_INTERNAL_FUNCTION declares
-    floating point APIs during libpng builds even if they are completely
-    disabled. This requires the png floating point types (png_double*) to be
-    declared even though the functions are never actually defined.  This
-    change provides a dummy definition so that the declarations work, yet any
-    implementation will fail to compile because of an incomplete type.
-  Re-eliminated the use of strcpy() in pngtest.c.  An unnecessary use of
-    strcpy() was accidentally re-introduced in libpng16; this change replaces
-    it with strncpy().
-  Eliminated use of png_sizeof(); use sizeof() instead.
-  Use a consistent style for (sizeof type) and (sizeof (array))
-  Cleanup of png_set_filler().  This function does very different things on
-    read and write.  In libpng 1.6 the two cases can be distinguished and
-    considerable code cleanup, and extra error checking, is possible.  This
-    makes calls on the write side that have no effect be ignored with a
-    png_app_error(), which can be disabled in the app using
-    png_set_benign_errors(), and removes the spurious use of usr_channels
-    on the read side.
-  Insist on autotools 1.12.1 for git builds because there are security issues
-    with 1.12 and insisting on anything less would allow 1.12 to be used.
-  Removed info_ptr->signature[8] from WRITE-only builds.
-  Add some conditions for compiling png_fixed().  This is a small function
-    but it requires "-lm" on some platforms.
-  Cause pngtest --strict to fail on any warning from libpng (not just errors)
-    and cause it not to fail at the comparison step if libpng lacks support
-    for writing chunks that it reads from the input (currently only implemented
-    for compressed text chunks).
-  Make all three "make check" test programs work without READ or WRITE support.
-    Now "make check" will succeed even if libpng is compiled with -DPNG_NO_READ
-    or -DPNG_NO_WRITE.  The tests performed are reduced, but the basic reading
-    and writing of a PNG file is always tested by one or more of the tests.
-  Consistently use strlen(), memset(), memcpy(), and memcmp() instead of the
-    png_strlen(), png_memset(), png_memcpy(), and png_memcmp() macros.
-  Removed the png_sizeof(), png_strlen(), png_memset(), png_memcpy(), and
-    png_memcmp() macros.
-  Work around gcc 3.x and Microsoft Visual Studio 2010 complaints. Both object
-    to the split initialization of num_chunks.
-
-Version 1.6.0beta28 [August 29, 2012]
-  Unknown handling fixes and clean up. This adds more correct option
-    control of the unknown handling, corrects the pre-existing bug where
-    the per-chunk 'keep' setting is ignored and makes it possible to skip
-    IDAT chunks in the sequential reader (broken in earlier 1.6 versions).
-    There is a new test program, test-unknown.c, which is a work in progress
-    (not currently part of the test suite).  Comments in the header files now
-    explain how the unknown handling works.
-  Allow fine grain control of unknown chunk APIs. This change allows
-    png_set_keep_unknown_chunks() to be turned off if not required and causes
-    both read and write to behave appropriately (on read this is only possible
-    if the user callback is used to handle unknown chunks).  The change
-    also removes the support for storing unknown chunks in the info_struct
-    if the only unknown handling enabled is via the callback, allowing libpng
-    to be configured with callback reading and none of the unnecessary code.
-  Corrected fix for unknown handling in pngtest. This reinstates the
-    libpng handling of unknown chunks other than vpAg and sTER (including
-    unsafe-to-copy chunks which were dropped before) and eliminates the
-    repositioning of vpAg and sTER in pngtest.png by changing pngtest.png
-    (so the chunks are where libpng would put them).
-  Added "tunknown" test and corrected a logic error in png_handle_unknown()
-    when SAVE support is absent.  Moved the shell test scripts for
-    contrib/libtests from the libpng top directory to contrib/libtests.
-    png_handle_unknown() must always read or skip the chunk, if
-    SAVE_UNKNOWN_CHUNKS is turned off *and* the application does not set
-    a user callback an unknown chunk will not be read, leading to a read
-    error, which was revealed by the "tunknown" test.
-  Cleaned up and corrected ICC profile handling.
-    contrib/libtests/makepng: corrected 'rgb' and 'gray' cases.  profile_error
-    messages could be truncated; made a correct buffer size calculation and
-    adjusted pngerror.c appropriately. png_icc_check_* checking improved;
-    changed the functions to receive the correct color type of the PNG on read
-    or write and check that it matches the color space of the profile (despite
-    what the comments said before, there is danger in assuming the app will
-    cope correctly with an RGB profile on a grayscale image and, since it
-    violates the PNG spec, allowing it is certain to produce inconsistent
-    app behavior and might even cause app crashes.) Check that profiles
-    contain the tags needed to process the PNG (tags all required by the ICC
-    spec). Removed unused PNG_STATIC from pngpriv.h.
-
-Version 1.6.0beta29 [September 4, 2012]
-  Fixed the simplified API example programs to add the *colormap parameter
-    to several of he API and improved the error message if the version field
-    is not set.
-  Added contrib/examples/* to the *.zip and *.7z distributions.
-  Updated simplified API synopses and description of the png_image structure
-    in the manual.
-  Made makepng and pngtest produce identical PNGs, add "--relaxed" option
-    to pngtest. The "--relaxed" option turns off the benign errors that are
-    enabled by default in pre-RC builds. makepng can now write ICC profiles
-    where the length has not been extended to a multiple of 4, and pngtest
-    now intercepts all libpng errors, allowing the previously-introduced
-    "--strict test" on no warnings to actually work.
-  Improved ICC profile handling including cHRM chunk generation and fixed
-    Cygwin+MSVC build errors. The ICC profile handling now includes more
-    checking.  Several errors that caused rejection of the profile are now
-    handled with a warning in such a way that the invalid profiles will be
-    read by default in release (but not pre-RC) builds but will not be
-    written by default.  The easy part of handling the cHRM chunk is written,
-    where the ICC profile contains the required data.  The more difficult
-    part plus guessing a gAMA value requires code to pass selected RGB values
-    through the profile.
-
-Version 1.6.0beta30 [October 24, 2012]
-  Changed ICC profile matrix/vector types to not depend on array type rules.
-    By the ANSI-C standard the new types should be identical to the previous
-    versions, and all known versions of gcc tested with the previous versions
-    except for GCC-4.2.1 work with this version.  The change makes the ANSI-C
-    rule that const applied to an array of elements applies instead to the
-    elements in the array moot by explicitly applying const to the base
-    elements of the png_icc_matrix and png_icc_vector types. The accidental
-    (harmless) 'const' previously applied to the parameters of two of the
-    functions have also been removed.
-  Added a work around for GCC 4.2 optimization bug.
-  Marked the broken (bad white point) original HP sRGB profiles correctly and
-    correct comments.
-  Added -DZ_SOLO to contrib/pngminim/*/makefile to work with zlib-1.2.7
-  Use /MDd for vstudio debug builds. Also added pngunkown to the vstudio
-    builds, fixed build errors and corrected a minor exit code error in
-    pngvalid if the 'touch' file name is invalid.
-  Add updated WARNING file to projects/vstudio from libpng 1.5/vstudio
-  Fixed build when using #define PNG_NO_READ_GAMMA in png_do_compose() in
-    pngrtran.c (Domani Hannes).
-
-Version 1.6.0beta31 [November 1, 2012]
-  Undid the erroneous change to vstudio/pngvalid build in libpng-1.6.0beta30.
-  Made pngvalid so that it will build outside the libpng source tree.
-  Made builds -DPNG_NO_READ_GAMMA compile (the unit tests still fail).
-  Made PNG_NO_READ_GAMMA switch off interfaces that depend on READ_GAMMA.
-    Prior to 1.6.0 switching off READ_GAMMA did unpredictable things to the
-    interfaces that use it (specifically, png_do_background in 1.4 would
-    simply display composite for grayscale images but do composition
-    with the incorrect arithmetic for color ones). In 1.6 the semantic
-    of -DPNG_NO_READ_GAMMA is changed to simply disable any interface that
-    depends on it; this obliges people who set it to consider whether they
-    really want it off if they happen to use any of the interfaces in
-    question (typically most users who disable it won't).
-  Fixed GUIDs in projects/vstudio. Some were duplicated or missing,
-    resulting in VS2010 having to update the files.
-  Removed non-working ICC profile support code that was mostly added to
-    libpng-1.6.0beta29 and beta30. There was too much code for too little
-    gain; implementing full ICC color correction may be desirable but is left
-    up to applications.
-
-Version 1.6.0beta32 [November 25, 2012]
-  Fixed an intermittent SEGV in pngstest due to an uninitialized array element.
-  Added the ability for contrib/libtests/makepng.c to make a PNG with just one
-    color. This is useful for debugging pngstest color inaccuracy reports.
-  Fixed error checking in the simplified write API (Olaf van der Spek)
-  Made png_user_version_check() ok to use with libpng version 1.10.x and later.
-
-Version 1.6.0beta33 [December 15, 2012]
-  Fixed typo in png.c (PNG_SET_CHUNK_MALLOC_MAX should be PNG_CHUNK_MALLOC_MAX)
-    that causes the MALLOC_MAX limit not to work (John Bowler)
-  Change png_warning() to png_app_error() in pngwrite.c and comment the
-    fall-through condition.
-  Change png_warning() to png_app_warning() in png_write_tRNS().
-  Rearranged the ARM-NEON optimizations: Isolated the machine specific code
-    to the hardware subdirectory and added comments to pngrutil.c so that
-    implementors of other optimizations know what to do.
-  Fixed cases of unquoted DESTDIR in Makefile.am
-  Rebuilt Makefile.in, etc., with autoconf-2.69 and automake-1.12.5.
-
-Version 1.6.0beta34 [December 19, 2012]
-  Cleaned up whitespace in the synopsis portion of the manpage "libpng.3"
-  Disassembled the version number in scripts/options.awk (necessary for
-    building on SunOs).
-
-Version 1.6.0beta35 [December 23, 2012]
-  Made default Zlib compression settings be configurable. This adds #defines to
-    pnglibconf.h to control the defaults.
-  Fixed Windows build issues, enabled ARM compilation. Various warnings issued
-    by earlier versions of GCC fixed for Cygwin and Min/GW (which both use old
-    GCCs.) ARM support is enabled by default in zlib.props (unsupported by
-    Microsoft) and ARM compilation is made possible by deleting the check for
-    x86. The test programs cannot be run because they are not signed.
-
-Version 1.6.0beta36 [January 2, 2013]
-  Discontinued distributing libpng-1.x.x.tar.bz2.
-  Discontinued distributing libpng-1.7.0-1.6.0-diff.txt and similar.
-  Rebuilt configure with autoconf-2.69 (inadvertently not done in beta33)
-  Fixed 'make distcheck' on SUN OS - libpng.so was not being removed
-
-Version 1.6.0beta37 [January 10, 2013]
-  Fixed conceivable but difficult to repro overflow. Also added two test
-    programs to generate and test a PNG which should have the problem.
-
-Version 1.6.0beta39 [January 19, 2013]
-  Again corrected attempt at overflow detection in png_set_unknown_chunks()
-  (CVE-2013-7353).  Added overflow detection in png_set_sPLT() and
-  png_set_text_2() (CVE-2013-7354).
-
-Version 1.6.0beta40 [January 20, 2013]
-  Use consistent handling of overflows in text, sPLT and unknown png_set_* APIs
-
-Version 1.6.0rc01 [January 26, 2013]
-  No changes.
-
-Version 1.6.0rc02 [February 4, 2013]
-  Added png_get_palette_max() function.
-
-Version 1.6.0rc03 [February 5, 2013]
-  Fixed the png_get_palette_max API.
-
-Version 1.6.0rc04 [February 7, 2013]
-  Turn serial tests back on (recently turned off by autotools upgrade).
-
-Version 1.6.0rc05 [February 8, 2013]
-  Update manual about png_get_palette_max().
-
-Version 1.6.0rc06 [February 9, 2013]
-  Fixed missing dependency in --prefix builds The intermediate
-    internal 'prefix.h' file can only be generated correctly after
-    pnglibconf.h, however the dependency was not in Makefile.am.  The
-    symptoms are unpredictable depending on the order make chooses to
-    build pngprefix.h and pnglibconf.h, often the error goes unnoticed
-    because there is a system pnglibconf.h to use instead.
-
-Version 1.6.0rc07 [February 10, 2013]
-  Enclosed the new png_get_palette_max in #ifdef PNG_GET_PALETTE_MAX_SUPPORTED
-    block, and revised pnglibconf.h and pnglibconf.h.prebuilt accordingly.
-
-Version 1.6.0rc08 [February 10, 2013]
-  Fix typo in png.h #ifdef
-
-Version 1.6.0 [February 14, 2013]
-  No changes.
-
-Version 1.6.1beta01 [February 16, 2013]
-  Made symbol prefixing work with the ARM neon optimizations. Also allow
-    pngpriv.h to be included for preprocessor definitions only, so it can
-    be used in non-C/C++ files. Back ported from libpng 1.7.
-  Made sRGB check numbers consistent.
-  Ported libpng 1.5 options.awk/dfn file handling to 1.6, fixed one bug.
-  Removed cc -E workround, corrected png_get_palette_max API Tested on
-    SUN OS cc 5.9, which demonstrates the tokenization problem previously
-    avoided by using /lib/cpp.  Since all .dfn output is now protected in
-    double quotes unless it is to be macro substituted the fix should
-    work everywhere.
-  Enabled parallel tests - back ported from libpng-1.7.
-  scripts/pnglibconf.dfa formatting improvements back ported from libpng17.
-  Fixed a race condition in the creation of the build 'scripts' directory
-    while building with a parallel make.
-  Use approved/supported Android method to check for NEON, use Linux/POSIX
-    1003.1 API to check /proc/self/auxv avoiding buffer allocation and other
-    library calls (ported from libpng15).
-
-Version 1.6.1beta02 [February 19, 2013]
-  Use parentheses more consistently in "#if defined(MACRO)" tests.
-  Folded long lines.
-  Reenabled code to allow zero length PLTE chunks for MNG.
-
-Version 1.6.1beta03 [February 22, 2013]
-  Fixed ALIGNED_MEMORY support.
-  Added a new configure option:
-    --enable-arm-neon=always will stop the run-time checks. New checks
-    within arm/arm_init.c will cause the code not to be compiled unless
-    __ARM_NEON__ is set. This should make it fail safe (if someone asks
-    for it on then the build will fail if it can't be done.)
-  Updated the INSTALL document.
-
-Version 1.6.1beta04 [February 27, 2013]
-  Revised INSTALL to recommend using CPPFLAGS instead of INCLUDES.
-  Revised scripts/makefile.freebsd to respect ZLIBLIB and ZLIBINC.
-  Revised scripts/dfn.awk to work with the buggy MSYS awk that has trouble
-    with CRLF line endings.
-
-Version 1.6.1beta05 [March 1, 2013]
-  Avoid a possible memory leak in contrib/gregbook/readpng.c
-
-Version 1.6.1beta06 [March 4, 2013]
-  Better documentation of unknown handling API interactions.
-  Corrected Android builds and corrected libpng.vers with symbol
-    prefixing.  It also makes those tests compile and link on Android.
-  Added an API png_set_option() to set optimization options externally,
-    providing an alternative and general solution for the non-portable
-    run-time tests used by the ARM Neon code, using the PNG_ARM_NEON option.
-  The order of settings vs options in pnglibconf.h is reversed to allow
-    settings to depend on options and options can now set (or override) the
-    defaults for settings.
-
-Version 1.6.1beta07 [March 7, 2013]
-  Corrected simplified API default gamma for color-mapped output, added
-    a flag to change default. In 1.6.0 when the simplified API was used
-    to produce color-mapped output from an input image with no gamma
-    information the gamma assumed for the input could be different from
-    that assumed for non-color-mapped output.  In particular 16-bit depth
-    input files were assumed to be sRGB encoded, whereas in the 'direct'
-    case they were assumed to have linear data.  This was an error.  The
-    fix makes the simplified API treat all input files the same way and
-    adds a new flag to the png_image::flags member to allow the
-    application/user to specify that 16-bit files contain sRGB data
-    rather than the default linear.
-  Fixed bugs in the pngpixel and makepng test programs.
-
-Version 1.6.1beta08 [March 7, 2013]
-  Fixed CMakelists.txt to allow building a single variant of the library
-    (Claudio Bley):
-  Introduced a PNG_LIB_TARGETS variable that lists all activated library
-    targets.  It is an error if this variable ends up empty, ie. you have
-    to build at least one library variant.
-  Made the *_COPY targets only depend on library targets actually being build.
-  Use PNG_LIB_TARGETS to unify a code path.
-  Changed the CREATE_SYMLINK macro to expect the full path to a file as the
-    first argument. When symlinking the filename component of that path is
-    determined and used as the link target.
-  Use copy_if_different in the CREATE_SYMLINK macro.
-
-Version 1.6.1beta09 [March 13, 2013]
-  Eliminated two warnings from the Intel C compiler. The warnings are
-    technically valid, although a reasonable treatment of division would
-    show it to be incorrect.
-
-Version 1.6.1rc01 [March 21, 2013]
-  No changes.
-
-Version 1.6.1 [March 28, 2013]
-  No changes.
-
-Version 1.6.2beta01 [April 14, 2013]
-  Updated documentation of 1.5.x to 1.6.x changes in iCCP chunk handling.
-  Fixed incorrect warning of excess deflate data. End condition - the
-    warning would be produced if the end of the deflate stream wasn't read
-    in the last row.  The warning is harmless.
-  Corrected the test on user transform changes on read. It was in the
-    png_set of the transform function, but that doesn't matter unless the
-    transform function changes the rowbuf size, and that is only valid if
-    transform_info is called.
-  Corrected a misplaced closing bracket in contrib/libtests/pngvalid.c
-    (Flavio Medeiros).
-  Corrected length written to uncompressed iTXt chunks (Samuli Suominen).
-    Bug was introduced in libpng-1.6.0.
-
-Version 1.6.2rc01 [April 18, 2013]
-  Added contrib/tools/fixitxt.c, to repair the erroneous iTXt chunk length
-    written by libpng-1.6.0 and 1.6.1.
-  Disallow storing sRGB information when the sRGB is not supported.
-
-Version 1.6.2rc02 [April 18, 2013]
-  Merge pngtest.c with libpng-1.7.0
-
-Version 1.6.2rc03 [April 22, 2013]
-  Trivial spelling cleanup.
-
-Version 1.6.2rc04 and 1.6.2rc05 [omitted]
-
-Version 1.6.2rc06 [April 24, 2013]
-  Reverted to version 1.6.2rc03.  Recent changes to arm/neon support
-    have been ported to libpng-1.7.0beta09 and will reappear in version
-    1.6.3beta01.
-
-Version 1.6.2 [April 25, 2013]
-  No changes.
-
-Version 1.6.3beta01 [April 25, 2013]
-  Revised stack marking in arm/filter_neon.S and configure.ac.
-  Ensure that NEON filter stuff is completely disabled when switched 'off'.
-    Previously the ARM NEON specific files were still built if the option
-    was switched 'off' as opposed to being explicitly disabled.
-
-Version 1.6.3beta02 [April 26, 2013]
-  Test for 'arm*' not just 'arm' in the host_cpu configure variable.
-  Rebuilt the configure scripts.
-
-Version 1.6.3beta03 [April 30, 2013]
-  Expanded manual paragraph about writing private chunks, particularly
-    the need to call png_set_keep_unknown_chunks() when writing them.
-  Avoid dereferencing NULL pointer possibly returned from
-    png_create_write_struct() (Andrew Church).
-
-Version 1.6.3beta05 [May 9, 2013]
-  Calculate our own zlib windowBits when decoding rather than trusting the
-    CMF bytes in the PNG datastream.
-  Added an option to force maximum window size for inflating, which was
-    the behavior of libpng15 and earlier, via a new PNG_MAXIMUM_INFLATE_WINDOW
-    option for png_set_options().
-  Added png-fix-itxt and png-fix-too-far-back to the built programs and
-    removed warnings from the source code and timepng that are revealed as
-    a result.
-  Detect wrong libpng versions linked to png-fix-too-far-back, which currently
-    only works with libpng versions that can be made to reliably fail when
-    the deflate data contains an out-of-window reference.  This means only
-    1.6 and later.
-  Fixed gnu issues: g++ needs a static_cast, gcc 4.4.7 has a broken warning
-    message which it is easier to work round than ignore.
-  Updated contrib/pngminus/pnm2png.c (Paul Stewart):
-    Check for EOF
-    Ignore "#" delimited comments in input file to pnm2png.c.
-    Fixed whitespace handling
-    Added a call to png_set_packing()
-    Initialize dimension values so if sscanf fails at least we have known
-      invalid values.
-  Attempt to detect configuration issues with png-fix-too-far-back, which
-    requires both the correct libpng and the correct zlib to function
-    correctly.
-  Check ZLIB_VERNUM for mismatches, enclose #error in quotes
-  Added information in the documentation about problems with and fixes for
-    the bad CRC and bad iTXt chunk situations.
-
-Version 1.6.3beta06 [May 12, 2013]
-  Allow contrib/pngminus/pnm2png.c to compile without WRITE_INVERT and
-    WRITE_PACK supported (writes error message that it can't read P1 or
-    P4 PBM files).
-  Improved png-fix-too-far-back usage message, added --suffix option.
-  Revised contrib/pngminim/*/makefile to generate pnglibconf.h with the
-    right zlib header files.
-  Separated CPPFLAGS and CFLAGS in contrib/pngminim/*/makefile
-
-Version 1.6.3beta07 [June 8, 2013]
-  Removed a redundant test in png_set_IHDR().
-  Added set(CMAKE_CONFIGURATION_TYPES ...) to CMakeLists.txt (Andrew Hundt)
-  Deleted set(CMAKE_BUILD_TYPE) block from CMakeLists.txt
-  Enclose the prototypes for the simplified write API in
-    #ifdef PNG_STDIO_SUPPORTED/#endif
-  Make ARM NEON support work at compile time (not just configure time).
-    This moves the test on __ARM_NEON__ into pngconf.h to avoid issues when
-    using a compiler that compiles for multiple architectures at one time.
-  Removed PNG_FILTER_OPTIMIZATIONS and PNG_ARM_NEON_SUPPORTED from
-    pnglibconf.h, allowing more of the decisions to be made internally
-    (pngpriv.h) during the compile.  Without this, symbol prefixing is broken
-    under certain circumstances on ARM platforms.  Now only the API parts of
-    the optimizations ('check' vs 'api') are exposed in the public header files
-    except that the new setting PNG_ARM_NEON_OPT documents how libpng makes the
-    decision about whether or not to use the optimizations.
-  Protect symbol prefixing against CC/CPPFLAGS/CFLAGS usage.
-    Previous iOS/Xcode fixes for the ARM NEON optimizations moved the test
-    on __ARM_NEON__ from configure time to compile time.  This breaks symbol
-    prefixing because the definition of the special png_init_filter_functions
-    call was hidden at configure time if the relevant compiler arguments are
-    passed in CFLAGS as opposed to CC.  This change attempts to avoid all
-    the confusion that would result by declaring the init function even when
-    it is not used, so that it will always get prefixed.
-
-Version 1.6.3beta08 [June 18, 2013]
-  Revised libpng.3 so that "doclifter" can process it.
-
-Version 1.6.3beta09 [June 27, 2013]
-  Revised example.c to illustrate use of PNG_DEFAULT_sRGB and PNG_GAMMA_MAC_18
-    as parameters for png_set_gamma().  These have been available since
-    libpng-1.5.4.
-  Renamed contrib/tools/png-fix-too-far-back.c to pngfix.c and revised it
-    to check all compressed chunks known to libpng.
-
-Version 1.6.3beta10 [July 5, 2013]
-  Updated documentation to show default behavior of benign errors correctly.
-  Only compile ARM code when PNG_READ_SUPPORTED is defined.
-  Fixed undefined behavior in contrib/tools/pngfix.c and added new strip
-    option. pngfix relied on undefined behavior and even a simple change from
-    gcc to g++ caused it to fail.  The new strip option 'unsafe' has been
-    implemented and is the default if --max is given.  Option names have
-    been clarified, with --strip=transform now stripping the bKGD chunk,
-    which was stripped previously with --strip=unused.
-  Added all documented chunk types to pngpriv.h
-  Unified pngfix.c source with libpng17.
-
-Version 1.6.3rc01 [July 11, 2013]
-  No changes.
-
-Version 1.6.3 [July 18, 2013]
-  Revised manual about changes in iTXt chunk handling made in libpng-1.6.0.
-  Added "/* SAFE */" comments in pngrutil.c and pngrtran.c where warnings
-    may be erroneously issued by code-checking applications.
-
-Version 1.6.4beta01 [August 21, 2013]
-  Added information about png_set_options() to the manual.
-  Delay calling png_init_filter_functions() until a row with nonzero filter
-    is found.
-
-Version 1.6.4beta02 [August 30, 2013]
-  Fixed inconsistent conditional compilation of png_chunk_unknown_handling()
-    prototype, definition, and usage.  Made it depend on
-    PNG_HANDLE_AS_UNKNOWN_SUPPORTED everywhere.
-
-Version 1.6.4rc01 [September 5, 2013]
-  No changes.
-
-Version 1.6.4 [September 12, 2013]
-  No changes.
-
-Version 1.6.5 [September 14, 2013]
-  Removed two stray lines of code from arm/arm_init.c.
-
-Version 1.6.6 [September 16, 2013]
-  Removed two stray lines of code from arm/arm_init.c, again.
-
-Version 1.6.7beta01 [September 30, 2013]
-  Revised unknown chunk code to correct several bugs in the NO_SAVE_/NO_WRITE
-    combination
-  Allow HANDLE_AS_UNKNOWN to work when other options are configured off. Also
-    fixed the pngminim makefiles to work when $(MAKEFLAGS) contains stuff
-    which terminates the make options (as by default in recent versions of
-    Gentoo).
-  Avoid up-cast warnings in pngvalid.c. On ARM the alignment requirements of
-    png_modifier are greater than that of png_store and as a consequence
-    compilation of pngvalid.c results in a warning about increased alignment
-    requirements because of the bare cast to (png_modifier*). The code is safe,
-    because the pointer is known to point to a stack allocated png_modifier,
-    but this change avoids the warning.
-  Fixed default behavior of ARM_NEON_API. If the ARM NEON API option was
-    compiled without the CHECK option it defaulted to on, not off.
-  Check user callback behavior in pngunknown.c. Previous versions compiled
-    if SAVE_UNKNOWN was not available but did nothing since the callback
-    was never implemented.
-  Merged pngunknown.c with 1.7 version and back ported 1.7 improvements/fixes
-
-Version 1.6.7beta02 [October 12, 2013]
-  Made changes for compatibility with automake 1.14:
-    1) Added the 'compile' program to the list of programs that must be cleaned
-       in autogen.sh
-    2) Added 'subdir-objects' which causes .c files in sub-directories to be
-       compiled such that the corresponding .o files are also in the
-       sub-directory.  This is because automake 1.14 warns that the
-       current behavior of compiling to the top level directory may be removed
-       in the future.
-    3) Updated dependencies on pnglibconf.h to match the new .o locations and
-       added all the files in contrib/libtests and contrib/tools that depend
-       on pnglibconf.h
-    4) Added 'BUILD_SOURCES = pnglibconf.h'; this is the automake recommended
-       way of handling the dependencies of sources that are machine generated;
-       unfortunately it only works if the user does 'make all' or 'make check',
-       so the dependencies (3) are still required.
-  Cleaned up (char*) casts of zlib messages. The latest version of the Intel C
-    compiler complains about casting a string literal as (char*), so copied the
-    treatment of z_const from the library code into pngfix.c
-  Simplified error message code in pngunknown. The simplification has the
-    useful side effect of avoiding a bogus warning generated by the latest
-    version of the Intel C compiler (it objects to
-    condition ? string-literal : string-literal).
-  Make autogen.sh work with automake 1.13 as well as 1.14. Do this by always
-    removing the 1.14 'compile' script but never checking for it.
-
-Version 1.6.7beta03 [October 19, 2013]
-  Added ARMv8 support (James Yu <james.yu at linaro.org>).  Added file
-    arm/filter_neon_intrinsics.c; enable with -mfpu=neon.
-  Revised pngvalid to generate size images with as many filters as it can
-    manage, limited by the number of rows.
-  Cleaned up ARM NEON compilation handling. The tests are now in pngpriv.h
-    and detect the broken GCC compilers.
-
-Version 1.6.7beta04 [October 26, 2013]
-  Allow clang derived from older GCC versions to use ARM intrinsics. This
-    causes all clang builds that use -mfpu=neon to use the intrinsics code,
-    not the assembler code.  This has only been tested on iOS 7. It may be
-    necessary to exclude some earlier clang versions but this seems unlikely.
-  Changed NEON implementation selection mechanism. This allows assembler
-    or intrinsics to be turned on at compile time during the build by defining
-    PNG_ARM_NEON_IMPLEMENTATION to the correct value (2 or 1).  This macro
-    is undefined by default and the build type is selected in pngpriv.h.
-
-Version 1.6.7rc01 [November 2, 2013]
-  No changes.
-
-Version 1.6.7rc02 [November 7, 2013]
-  Fixed #include in filter_neon_intrinsics.c and ctype macros. The ctype char
-    checking macros take an unsigned char argument, not a signed char.
-
-Version 1.6.7 [November 14, 2013]
-  No changes.
-
-Version 1.6.8beta01 [November 24, 2013]
-  Moved prototype for png_handle_unknown() in pngpriv.h outside of
-    the #ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED/#endif block.
-  Added "-Wall" to CFLAGS in contrib/pngminim/*/makefile
-  Conditionally compile some unused functions reported by -Wall in
-    pngminim.
-  Fixed 'minimal' builds. Various obviously useful minimal configurations
-    don't build because of missing contrib/libtests test programs and
-    overly complex dependencies in scripts/pnglibconf.dfa. This change
-    adds contrib/conftest/*.dfa files that can be used in automatic build
-    scripts to ensure that these configurations continue to build.
-  Enabled WRITE_INVERT and WRITE_PACK in contrib/pngminim/encoder.
-  Fixed pngvalid 'fail' function declaration on the Intel C Compiler.
-    This reverts to the previous 'static' implementation and works round
-    the 'unused static function' warning by using PNG_UNUSED().
-
-Version 1.6.8beta02 [November 30, 2013]
-  Removed or marked PNG_UNUSED some harmless "dead assignments" reported
-    by clang scan-build.
-  Changed tabs to 3 spaces in png_debug macros and changed '"%s"m'
-    to '"%s" m' to improve portability among compilers.
-  Changed png_free_default() to free() in pngtest.c
-
-Version 1.6.8rc01 [December 12, 2013]
-  Tidied up pngfix inits and fixed pngtest no-write builds.
-
-Version 1.6.8rc02 [December 14, 2013]
-  Handle zero-length PLTE chunk or NULL palette with png_error()
-    instead of png_chunk_report(), which by default issues a warning
-    rather than an error, leading to later reading from a NULL pointer
-    (png_ptr->palette) in png_do_expand_palette(). This is CVE-2013-6954
-    and VU#650142.  Libpng-1.6.1 through 1.6.7 are vulnerable.
-    Libpng-1.6.0 and earlier do not have this bug.
-
-Version 1.6.8 [December 19, 2013]
-  No changes.
-
-Version 1.6.9beta01 [December 26, 2013]
-  Bookkeeping: Moved functions around (no changes). Moved transform
-    function definitions before the place where they are called so that
-    they can be made static. Move the intrapixel functions and the
-    grayscale palette builder out of the png?tran.c files. The latter
-    isn't a transform function and is no longer used internally, and the
-    former MNG specific functions are better placed in pngread/pngwrite.c
-  Made transform implementation functions static. This makes the internal
-    functions called by png_do_{read|write}_transformations static. On an
-    x86-64 DLL build (Gentoo Linux) this reduces the size of the text
-    segment of the DLL by 1208 bytes, about 0.6%. It also simplifies
-    maintenance by removing the declarations from pngpriv.h and allowing
-    easier changes to the internal interfaces.
-  Rebuilt configure scripts with automake-1.14.1 and autoconf-2.69
-    in the tar distributions.
-
-Version 1.6.9beta02 [January 1, 2014]
-  Added checks for libpng 1.5 to pngvalid.c.  This supports the use of
-    this version of pngvalid in libpng 1.5
-  Merged with pngvalid.c from libpng-1.7 changes to create a single
-    pngvalid.c
-  Removed #error macro from contrib/tools/pngfix.c (Thomas Klausner).
-  Merged pngrio.c, pngtrans.c, pngwio.c, and pngerror.c with libpng-1.7.0
-  Merged libpng-1.7.0 changes to make no-interlace configurations work
-    with test programs.
-  Revised pngvalid.c to support libpng 1.5, which does not support the
-    PNG_MAXIMUM_INFLATE_WINDOW option, so #define it out when appropriate in
-    pngvalid.c
-  Allow unversioned links created on install to be disabled in configure.
-    In configure builds 'make install' changes/adds links like png.h
-    and libpng.a to point to the newly installed, versioned, files (e.g.
-    libpng17/png.h and libpng17.a). Three new configure options and some
-    rearrangement of Makefile.am allow creation of these links to be disabled.
-
-Version 1.6.9beta03 [January 10, 2014]
-  Removed potentially misleading warning from png_check_IHDR().
-
-Version 1.6.9beta04 [January 20, 2014]
-  Updated scripts/makefile.* to use CPPFLAGS (Cosmin).
-  Added clang attribute support (Cosmin).
-
-Version 1.6.9rc01 [January 28, 2014]
-  No changes.
-
-Version 1.6.9rc02 [January 30, 2014]
-  Quiet an uninitialized memory warning from VC2013 in png_get_png().
-
-Version 1.6.9 [February 6, 2014]
-
-Version 1.6.10beta01 [February 9, 2014]
-  Backported changes from libpng-1.7.0beta30 and beta31:
-  Fixed a large number of instances where PNGCBAPI was omitted from
-    function definitions.
-  Added pngimage test program for png_read_png() and png_write_png()
-    with two new test scripts.
-  Removed dependence on !PNG_READ_EXPAND_SUPPORTED for calling
-    png_set_packing() in png_read_png().
-  Fixed combination of ~alpha with shift. On read invert alpha, processing
-    occurred after shift processing, which causes the final values to be
-    outside the range that should be produced by the shift. Reversing the
-    order on read makes the two transforms work together correctly and mirrors
-    the order used on write.
-  Do not read invalid sBIT chunks. Previously libpng only checked sBIT
-    values on write, so a malicious PNG writer could therefore cause
-    the read code to return an invalid sBIT chunk, which might lead to
-    application errors or crashes.  Such chunks are now skipped (with
-    chunk_benign_error).
-  Make png_read_png() and png_write_png() prototypes in png.h depend
-    upon PNG_READ_SUPPORTED and PNG_WRITE_SUPPORTED.
-  Support builds with unsupported PNG_TRANSFORM_* values.  All of the
-    PNG_TRANSFORM_* values are always defined in png.h and, because they
-    are used for both read and write in some cases, it is not reliable
-    to #if out ones that are totally unsupported. This change adds error
-    detection in png_read_image() and png_write_image() to do a
-    png_app_error() if the app requests something that cannot be done
-    and it adds corresponding code to pngimage.c to handle such options
-    by not attempting to test them.
-
-Version 1.6.10beta02 [February 23, 2014]
-  Moved redefines of png_error(), png_warning(), png_chunk_error(),
-    and png_chunk_warning() from pngpriv.h to png.h to make them visible
-    to libpng-calling applications.
-  Moved OS dependent code from arm/arm_init.c, to allow the included
-    implementation of the ARM NEON discovery function to be set at
-    build-time and provide sample implementations from the current code in the
-    contrib/arm-neon subdirectory. The __linux__ code has also been changed to
-    compile and link on Android by using /proc/cpuinfo, and the old linux code
-    is in contrib/arm-neon/linux-auxv.c.  The new code avoids POSIX and Linux
-    dependencies apart from opening /proc/cpuinfo and is C90 compliant.
-  Check for info_ptr == NULL early in png_read_end() so we don't need to
-    run all the png_handle_*() and depend on them to return if info_ptr == NULL.
-    This improves the performance of png_read_end(png_ptr, NULL) and makes
-    it more robust against future programming errors.
-  Check for __has_extension before using it in pngconf.h, to
-    support older Clang versions (Jeremy Sequoia).
-  Treat CRC error handling with png_set_crc_action(), instead of with
-    png_set_benign_errors(), which has been the case since libpng-1.6.0beta18.
-  Use a user warning handler in contrib/gregbook/readpng2.c instead of default,
-    so warnings will be put on stderr even if libpng has CONSOLE_IO disabled.
-  Added png_ptr->process_mode = PNG_READ_IDAT_MODE in png_push_read_chunk
-    after recognizing the IDAT chunk, which avoids an infinite loop while
-    reading a datastream whose first IDAT chunk is of zero-length.
-    This fixes CERT VU#684412 and CVE-2014-0333.
-  Don't recognize known sRGB profiles as sRGB if they have been hacked,
-    but don't reject them and don't issue a copyright violation warning.
-
-Version 1.6.10beta03 [February 25, 2014]
-  Moved some documentation from png.h to libpng.3 and libpng-manual.txt
-  Minor editing of contrib/arm-neon/README and contrib/examples/*.c
-
-Version 1.6.10rc01 [February 27, 2014]
-  Fixed typos in the manual and in scripts/pnglibconf.dfa (CFLAGS -> CPPFLAGS
-    and PNG_USR_CONFIG -> PNG_USER_CONFIG).
-
-Version 1.6.10rc02 [February 28, 2014]
-  Removed unreachable return statement after png_chunk_error()
-    in pngrutil.c
-
-Version 1.6.10rc03 [March 4, 2014]
-  Un-deprecated png_data_freer().
-
-Version 1.6.10 [March 6, 2014]
-  No changes.
-
-Version 1.6.11beta01 [March 17, 2014]
-  Use "if (value != 0)" instead of "if (value)" consistently.
-  Changed ZlibSrcDir from 1.2.5 to 1.2.8 in projects/vstudio.
-  Moved configuration information from the manual to the INSTALL file.
-
-Version 1.6.11beta02 [April 6, 2014]
-  Removed #if/#else/#endif from inside two pow() calls in pngvalid.c because
-    they were handled improperly by Portland Group's PGI-14.1 - PGI-14.3
-    when using its "__builtin_pow()" function.
-  Silence 'unused parameter' build warnings (Cosmin Truta).
-  $(CP) is now used alongside $(RM_F).  Also, use 'copy' instead of 'cp'
-    where applicable, and applied other minor makefile changes (Cosmin).
-  Don't warn about invalid dimensions exceeding user limits (Cosmin).
-  Allow an easy replacement of the default pre-built configuration
-    header with a custom header, via the make PNGLIBCONF_H_PREBUILT
-    macro (Cosmin).
-
-Version 1.6.11beta03 [April 6, 2014]
-  Fixed a typo in pngrutil.c, introduced in libpng-1.5.6, that interferes
-    with "blocky" expansion of sub-8-bit interlaced PNG files (Eric Huss).
-  Optionally use  __builtin_bswap16() in png_do_swap().
-
-Version 1.6.11beta04 [April 19, 2014]
-  Made progressive reading of interlaced images consistent with the
-    behavior of the sequential reader and consistent with the manual, by
-    moving some code out of the PNG_READ_INTERLACING_SUPPORTED blocks. The
-    row_callback now receives the proper pass number and unexpanded rows, when
-    png_combine_row() isn't built or used, and png_set_interlace_handling()
-    is not called.
-  Allow PNG_sRGB_PROFILE_CHECKING = (-1) to mean no sRGB profile checking.
-
-Version 1.6.11beta05 [April 26, 2014]
-  Do not reject ICC V2 profiles that lack padding (Kai-Uwe Behrmann).
-  Relocated closing bracket of the sRGB profile test loop to avoid getting
-    "Not recognizing known sRGB profile that has been edited" warning for
-    ICC V2 profiles that lack the MD5 signature in the profile header.
-
-Version 1.6.11beta06 [May 19, 2014]
-  Added PNG_SKIP_sRGB_CHECK_PROFILE choice for png_set_option().
-
-Version 1.6.11rc01 [May 27, 2014]
-  No changes.
-
-Version 1.6.11rc02 [June 3, 2014]
-  Test ZLIB_VERNUM instead of PNG_ZLIB_VERNUM in contrib/tools/pngfix.c
-
-Version 1.6.11 [June 5, 2014]
-  No changes.
-
-Version 1.6.12rc01 [June 6, 2014]
-  Relocated new code from 1.6.11beta06 in png.c to a point after the
-    declarations (Max Stepin).
-
-Version 1.6.12rc02 [June 7, 2014]
-  Changed file permissions of contrib/tools/intgamma.sh,
-    test-driver, and compile from 0644 to 0755 (Cosmin).
-
-Version 1.6.12rc03 [June 8, 2014]
-  Ensure "__has_attribute()" macro exists before trying to use it with
-    old clang compilers (MacPorts Ticket #43939).
-
-Version 1.6.12 [June 12, 2014]
-  No changes.
-
-Version 1.6.13beta01 [July 4, 2014]
-  Quieted -Wsign-compare and -Wclobber compiler warnings in
-    contrib/pngminus/*.c
-  Added "(void) png_ptr;" where needed in contrib/gregbook to quiet
-    compiler complaints about unused pointers.
-  Split a long output string in contrib/gregbook/rpng2-x.c.
-  Added "PNG_SET_OPTION" requirement for sRGB chunk support to pnglibconf.dfa,
-    Needed for write-only support (John Bowler).
-  Changed "if defined(__ARM_NEON__)" to
-    "if (defined(__ARM_NEON__) || defined(__ARM_NEON))" (James Wu).
-  Fixed clang no-warning builds: png_digit was defined but never used.
-
-Version 1.6.13beta02 [July 21, 2014]
-  Fixed an incorrect separator ("/" should be "\") in scripts/makefile.vcwin32
-    (bug report from Wolfgang S. Kechel).  Bug was introduced in libpng-1.6.11.
-    Also fixed makefile.bc32, makefile.bor, makefile.msc, makefile.intel, and
-    makefile.tc3 similarly.
-
-Version 1.6.13beta03 [August 3, 2014]
-  Removed scripts/makefile.elf. It has not worked since libpng-1.5.0beta14
-    due to elimination of the PNG_FUNCTION_EXPORT and PNG_DATA_EXPORT
-    definitions from pngconf.h.
-  Ensure that CMakeLists.txt makes the target "lib" directory before making
-    symbolic link into it (SourceForge bug report #226 by Rolf Timmermans).
-
-Version 1.6.13beta04 [August 8, 2014]
-  Added opinion that the ECCN (Export Control Classification Number) for
-    libpng is EAR99 to the README file.
-  Eliminated use of "$<" in makefile explicit rules, when copying
-    $PNGLIBCONF_H_PREBUILT.  This does not work on some versions of make;
-    bug introduced in libpng version 1.6.11.
-
-Version 1.6.13rc01 [August 14, 2014]
-  Made "ccopts" agree with "CFLAGS" in scripts/makefile.hp* and makefile.*sunu
-
-Version 1.6.13 [August 21, 2014]
-  No changes.
-
-Version 1.6.14beta01 [September 14, 2014]
-  Guard usage of png_ptr->options with #ifdef PNG_SET_OPTION_SUPPORTED.
-  Do not build contrib/tools/pngfix.c when PNG_SETJMP_NOT_SUPPORTED,
-    to allow "make" to complete without setjmp support (bug report by
-    Claudio Fontana)
-  Add "#include <setjmp.h>" to contrib/tools/pngfix.c (John Bowler)
-
-Version 1.6.14beta02 [September 18, 2014]
-  Use nanosleep() instead of usleep() in contrib/gregbook/rpng2-x.c
-    because usleep() is deprecated.
-  Define usleep() in contrib/gregbook/rpng2-x.c if not already defined
-    in unistd.h and nanosleep() is not available; fixes error introduced
-    in libpng-1.6.13.
-  Disable floating point exception handling in pngvalid.c when
-    PNG_FLOATING_ARITHMETIC is not supported (bug report by "zootus
-    at users.sourceforge.net").
-
-Version 1.6.14beta03 [September 19, 2014]
-  Define FE_DIVBYZERO, FE_INVALID, and FE_OVERFLOW in pngvalid.c if not
-    already defined.  Revert floating point exception handling in pngvalid.c
-    to version 1.6.14beta01 behavior.
-
-Version 1.6.14beta04 [September 27, 2014]
-  Fixed incorrect handling of the iTXt compression flag in pngrutil.c
-    (bug report by Shunsaku Hirata).  Bug was introduced in libpng-1.6.0.
-
-Version 1.6.14beta05 [October 1, 2014]
-  Added "option READ_iCCP enables READ_COMPRESSED_TEXT" to pnglibconf.dfa
-
-Version 1.6.14beta06 [October 5, 2014]
-  Removed unused "text_len" parameter from private function png_write_zTXt().
-  Conditionally compile some code in png_deflate_claim(), when
-    PNG_WARNINGS_SUPPORTED and PNG_ERROR_TEXT_SUPPORTED are disabled.
-  Replaced repeated code in pngpread.c with PNG_PUSH_SAVE_BUFFER_IF_FULL.
-  Added "chunk iTXt enables TEXT" and "chunk zTXt enables TEXT"
-    to pnglibconf.dfa.
-  Removed "option READ_COMPRESSED_TEXT enables READ_TEXT" from pnglibconf.dfa,
-    to make it possible to configure a libpng that supports iCCP but not TEXT.
-
-Version 1.6.14beta07 [October 7, 2014]
-  Removed "option WRITE_COMPRESSED_TEXT enables WRITE_TEXT" from pnglibconf.dfa
-  Only mark text chunks as written after successfully writing them.
-
-Version 1.6.14rc01 [October 15, 2014]
-  Fixed some typos in comments.
-
-Version 1.6.14rc02 [October 17, 2014]
-  Changed png_convert_to_rfc_1123() to png_convert_to_rfc_1123_buffer()
-    in the manual, to reflect the change made in libpng-1.6.0.
-  Updated README file to explain that direct access to the png_struct
-    and info_struct members has not been permitted since libpng-1.5.0.
-
-Version 1.6.14 [October 23, 2014]
-  No changes.
-
-Version 1.6.15beta01 [October 29, 2014]
-  Changed "if (!x)" to "if (x == 0)" and "if (x)" to "if (x != 0)"
-  Simplified png_free_data().
-  Added missing "ptr = NULL" after some instances of png_free().
-
-Version 1.6.15beta02 [November 1, 2014]
-  Changed remaining "if (!x)" to "if (x == 0)" and "if (x)" to "if (x != 0)"
-
-Version 1.6.15beta03 [November 3, 2014]
-  Added PNG_USE_ARM_NEON configuration flag (Marcin Juszkiewicz).
-
-Version 1.6.15beta04 [November 4, 2014]
-  Removed new PNG_USE_ARM_NEON configuration flag and made a one-line
-    revision to configure.ac to support ARM on aarch64 instead (John Bowler).
-
-Version 1.6.15beta05 [November 5, 2014]
-  Use png_get_libpng_ver(NULL) instead of PNG_LIBPNG_VER_STRING in
-    example.c, pngtest.c, and applications in the contrib directory.
-  Fixed an out-of-range read in png_user_version_check() (Bug report from
-    Qixue Xiao, CVE-2015-8540).
-  Simplified and future-proofed png_user_version_check().
-  Fixed GCC unsigned int->float warnings. Various versions of GCC
-    seem to generate warnings when an unsigned value is implicitly
-    converted to double. This is probably a GCC bug but this change
-    avoids the issue by explicitly converting to (int) where safe.
-  Free all allocated memory in pngimage. The file buffer cache was left
-    allocated at the end of the program, harmless but it causes memory
-    leak reports from clang.
-  Fixed array size calculations to avoid warnings. At various points
-    in the code the number of elements in an array is calculated using
-    sizeof.  This generates a compile time constant of type (size_t) which
-    is then typically assigned to an (unsigned int) or (int). Some versions
-    of GCC on 64-bit systems warn about the apparent narrowing, even though
-    the same compiler does apparently generate the correct, in-range,
-    numeric constant.  This adds appropriate, safe, casts to make the
-    warnings go away.
-
-Version 1.6.15beta06 [November 6, 2014]
-  Reverted use png_get_libpng_ver(NULL) instead of PNG_LIBPNG_VER_STRING
-    in the manual, example.c, pngtest.c, and applications in the contrib
-    directory.  It was incorrect advice.
-
-Version 1.6.15beta07 [November 7, 2014]
-  Removed #ifdef PNG_16BIT_SUPPORTED/#endif around png_product2(); it is
-    needed by png_reciprocal2().
-  Added #ifdef PNG_16BIT_SUPPORTED/#endif around png_log16bit() and
-    png_do_swap().
-  Changed all "#endif /* PNG_FEATURE_SUPPORTED */" to "#endif /* FEATURE */"
-
-Version 1.6.15beta08 [November 8, 2014]
-  More housecleaning in *.h
-
-Version 1.6.15rc01 [November 13, 2014]
-
-Version 1.6.15rc02 [November 14, 2014]
-  The macros passed in the command line to Borland make were ignored if
-    similarly-named macros were already defined in makefiles. This behavior
-    is different from POSIX make and other make programs.  Surround the
-    macro definitions with ifndef guards (Cosmin).
-
-Version 1.6.15rc03 [November 16, 2014]
-  Added "-D_CRT_SECURE_NO_WARNINGS" to CFLAGS in scripts/makefile.vcwin32.
-  Removed the obsolete $ARCH variable from scripts/makefile.darwin.
-
-Version 1.6.15 [November 20, 2014]
-  No changes.
-
-Version 1.6.16beta01 [December 14, 2014]
-  Added ".align 2" to arm/filter_neon.S to support old GAS assemblers that
-    don't do alignment correctly.
-  Revised Makefile.am and scripts/symbols.dfn to work with MinGW/MSYS
-    (Bob Friesenhahn).
-
-Version 1.6.16beta02 [December 15, 2014]
-  Revised Makefile.am and scripts/*.dfn again to work with MinGW/MSYS;
-    renamed scripts/*.dfn to scripts/*.c (John Bowler).
-
-Version 1.6.16beta03 [December 21, 2014]
-  Quiet a "comparison always true" warning in pngstest.c (John Bowler).
-
-Version 1.6.16rc01 [December 21, 2014]
-  Restored a test on width that was removed from png.c at libpng-1.6.9
-    (Bug report by Alex Eubanks, CVE-2015-0973).
-
-Version 1.6.16rc02 [December 21, 2014]
-  Undid the update to pngrutil.c in 1.6.16rc01.
-
-Version 1.6.16rc03 [December 21, 2014]
-  Fixed an overflow in png_combine_row() with very wide interlaced images
-    (Bug report and fix by John Bowler, CVE-2014-9495).
-
-Version 1.6.16 [December 22, 2014]
-  No changes.
-
-Version 1.6.17beta01 [January 29, 2015]
-  Removed duplicate PNG_SAFE_LIMITS_SUPPORTED handling from pngconf.h
-  Corrected the width limit calculation in png_check_IHDR().
-  Removed user limits from pngfix. Also pass NULL pointers to
-    png_read_row to skip the unnecessary row de-interlace stuff.
-  Added testing of png_set_packing() to pngvalid.c
-  Regenerated configure scripts in the *.tar distributions with libtool-2.4.4
-  Implement previously untested cases of libpng transforms in pngvalid.c
-  Fixed byte order in png_do_read_filler() with 16-bit input. Previously
-    the high and low bytes of the filler, from png_set_filler() or from
-    png_set_add_alpha(), were read in the wrong order.
-  Made the check for out-of-range values in png_set_tRNS() detect
-    values that are exactly 2^bit_depth, and work on 16-bit platforms.
-  Merged some parts of libpng-1.6.17beta01 and libpng-1.7.0beta47.
-  Added #ifndef __COVERITY__ where needed in png.c, pngrutil.c and
-    pngset.c to avoid warnings about dead code.
-  Added "& 0xff" to many instances of expressions that are typecast
-    to (png_byte), to avoid Coverity warnings.
-
-Version 1.6.17beta02 [February 7, 2015]
-  Work around one more Coverity-scan dead-code warning.
-  Do not build png_product2() when it is unused.
-
-Version 1.6.17beta03 [February 17, 2015]
-  Display user limits in the output from pngtest.
-  Eliminated the PNG_SAFE_LIMITS macro and restored the 1-million-column
-    and 1-million-row default limits in pnglibconf.dfa, that can be reset
-    by the user at build time or run time.  This provides a more robust
-    defense against DOS and as-yet undiscovered overflows.
-
-Version 1.6.17beta04 [February 21, 2015]
-  Added PNG_WRITE_CUSTOMIZE_COMPRESSION_SUPPORTED macro, on by default.
-  Allow user to call png_get_IHDR() with NULL arguments (Reuben Hawkins).
-  Rebuilt configure scripts with automake-1.15 and libtool-2.4.6
-
-Version 1.6.17beta05 [February 25, 2015]
-  Restored compiling of png_reciprocal2 with PNG_NO_16BIT.
-
-Version 1.6.17beta06 [February 27, 2015]
-  Moved png_set_filter() prototype into a PNG_WRITE_SUPPORTED block
-    of png.h.
-  Avoid runtime checks when converting integer to png_byte with
-    Visual Studio (Sergey Kosarevsky)
-
-Version 1.6.17rc01 [March 4, 2015]
-  No changes.
-
-Version 1.6.17rc02 [March 9, 2015]
-  Removed some comments that the configure script did not handle
-    properly from scripts/pnglibconf.dfa and pnglibconf.h.prebuilt.
-  Free the unknown_chunks structure even when it contains no data.
-
-Version 1.6.17rc03 [March 12, 2015]
-  Updated CMakeLists.txt to add OSX framework, change YES/NO to ON/OFF
-    for consistency, and remove some useless tests (Alexey Petruchik).
-
-Version 1.6.17rc04 [March 16, 2015]
-  Remove pnglibconf.h, pnglibconf.c, and pnglibconf.out instead of
-    pnglibconf.* in "make clean" (Cosmin).
-  Fix bug in calculation of maxbits, in png_write_sBIT, introduced
-    in libpng-1.6.17beta01 (John Bowler).
-
-Version 1.6.17rc05 [March 21, 2015]
-  Define PNG_FILTER_* and PNG_FILTER_VALUE_* in png.h even when WRITE
-    is not supported (John Bowler).  This fixes an error introduced in
-    libpng-1.6.17beta06.
-  Reverted "& 0xff" additions of version 1.6.17beta01. Libpng passes
-    the Coverity scan without them.
-
-Version 1.6.17rc06 [March 23, 2015]
-  Remove pnglibconf.dfn and pnglibconf.pre with "make clean".
-  Reformatted some "&0xff" instances to "& 0xff".
-  Fixed simplified 8-bit-linear to sRGB alpha. The calculated alpha
-    value was wrong.  It's not clear if this affected the final stored
-    value; in the obvious code path the upper and lower 8-bits of the
-    alpha value were identical and the alpha was truncated to 8-bits
-    rather than dividing by 257 (John Bowler).
-
-Version 1.6.17 [March 26, 2015]
-  No changes.
-
-Version 1.6.18beta01 [April 1, 2015]
-  Removed PNG_SET_CHUNK_[CACHE|MALLOC]_LIMIT_SUPPORTED macros.  They
-    have been combined with PNG_SET_USER_LIMITS_SUPPORTED (resolves
-    bug report by Andrew Church).
-  Fixed rgb_to_gray checks and added tRNS checks to pngvalid.c.  This
-    fixes some arithmetic errors that caused some tests to fail on
-    some 32-bit platforms (Bug reports by Peter Breitenlohner [i686]
-    and Petr Gajdos [i586]).
-
-Version 1.6.18beta02 [April 26, 2015]
-  Suppressed some warnings from the Borland C++ 5.5.1/5.82 compiler
-    (Bug report by Viktor Szakats).
-
-Version 1.6.18beta03 [May 6, 2015]
-  Replaced "unexpected" with an integer (0xabadca11) in pngset.c
-    where a long was expected, to avoid a compiler warning when PNG_DEBUG > 1.
-  Added contrib/examples/simpleover.c, to demonstrate how to handle
-    alpha compositing of multiple images, using the "simplified API"
-    and an example PNG generation tool, contrib/examples/genpng.c
-    (John Bowler).
-
-Version 1.6.18beta04 [May 20, 2015]
-  PNG_RELEASE_BUILD replaces tests where the code depended on the build base
-    type and can be defined on the command line, allowing testing in beta
-    builds (John Bowler).
-  Avoid Coverity issue 80858 (REVERSE NULL) in pngtest.c PNG_DEBUG builds.
-  Avoid a harmless potential integer overflow in png_XYZ_from_xy() (Bug
-    report from Christopher Ferris).
-
-Version 1.6.18beta05 [May 31, 2015]
-  Backport filter selection code from libpng-1.7.0beta51, to combine
-    sub_row, up_row, avg_row, and paeth_row into try_row and tst_row.
-  Changed png_voidcast(), etc., to voidcast(), etc., in contrib/tools/pngfix.c
-    to avoid confusion with the libpng private macros.
-  Fixed old cut&paste bug in the weighted filter selection code in
-    pngwutil.c, introduced in libpng-0.95, March 1997.
-
-Version 1.6.18beta06 [June 1, 2015]
-  Removed WRITE_WEIGHTED_FILTERED code, to save a few kbytes of the
-    compiled library size. It never worked properly and as far as we can
-    tell, no one uses it. The png_set_filter_heuristics() and
-    png_set_filter_heuristics_fixed() APIs are retained but deprecated
-    and do nothing.
-
-Version 1.6.18beta07 [June 6, 2015]
-  Removed non-working progressive reader 'skip' function. This
-    function has apparently never been used. It was implemented
-    to support back-door modification of png_struct in libpng-1.4.x
-    but (because it does nothing and cannot do anything) was apparently
-    never tested (John Bowler).
-  Fixed cexcept.h in which GCC 5 now reports that one of the auto
-    variables in the Try macro needs to be volatile to prevent value
-    being lost over the setjmp (John Bowler).
-  Fixed NO_WRITE_FILTER and -Wconversion build breaks (John Bowler).
-  Fix g++ build breaks (John Bowler).
-  Quieted some Coverity issues in pngfix.c, png-fix-itxt.c, pngvalid.c,
-    pngstest.c, and pngimage.c. Most seem harmless, but png-fix-itxt
-    would only work with iTXt chunks with length 255 or less.
-  Added #ifdef's to contrib/examples programs so people don't try
-    to compile them without the minimum required support enabled
-    (suggested by Flavio Medeiros).
-
-Version 1.6.18beta08 [June 30, 2015]
-  Eliminated the final two Coverity defects (insecure temporary file
-    handling in contrib/libtests/pngstest.c; possible overflow of
-    unsigned char in contrib/tools/png-fix-itxt.c). To use the "secure"
-    file handling, define PNG_USE_MKSTEMP, otherwise "tmpfile()" will
-    be used.
-  Removed some unused WEIGHTED_FILTER macros from png.h and pngstruct.h
-
-Version 1.6.18beta09 [July 5, 2015]
-  Removed some useless typecasts from contrib/tools/png-fix-itxt.c
-  Fixed a new signed-unsigned comparison in pngrtran.c (Max Stepin).
-  Replaced arbitrary use of 'extern' with #define PNG_LINKAGE_*.  To
-    preserve API compatibility, the new defines all default to "extern"
-    (requested by Jan Nijtmans).
-
-Version 1.6.18rc01 [July 9, 2015]
-  Belatedly added Mans Rullgard and James Yu to the list of Contributing
-    Authors.
-
-Version 1.6.18rc02 [July 12, 2015]
-  Restored unused FILTER_HEURISTIC macros removed at libpng-1.6.18beta08
-    to png.h to avoid compatibility warnings.
-
-Version 1.6.18rc03 [July 15, 2015]
-  Minor changes to the man page
-
-Version 1.6.18 [July 23, 2015]
-  No changes.
-
-Version 1.6.19beta01 [July 30, 2015]
-  Updated obsolete information about the simplified API macros in the
-    manual pages (Bug report by Arc Riley).
-  Avoid potentially dereferencing NULL info_ptr in png_info_init_3().
-  Rearranged png.h to put the major sections in the same order as
-    in libpng17.
-  Eliminated unused PNG_COST_SHIFT, PNG_WEIGHT_SHIFT, PNG_COST_FACTOR, and
-    PNG_WEIGHT_FACTOR macros.
-  Suppressed some warnings from the Borland C++ 5.5.1/5.82 compiler
-    (Bug report by Viktor Szakats).  Several warnings remain and are
-    unavoidable, where we test for overflow.
-  Fixed potential leak of png_pixels in contrib/pngminus/pnm2png.c
-  Fixed uninitialized variable in contrib/gregbook/rpng2-x.c
-
-Version 1.6.19beta02 [August 19, 2015]
-  Moved config.h.in~ from the "libpng_autotools_files" list to the
-    "libpng_autotools_extra" list in autogen.sh because it was causing a
-    false positive for missing files (bug report by Robert C. Seacord).
-  Removed unreachable "break" statements in png.c, pngread.c, and pngrtran.c
-    to suppress clang warnings (Bug report by Viktor Szakats).
-  Fixed some bad links in the man page.
-  Changed "n bit" to "n-bit" in comments.
-  Added signed/unsigned 16-bit safety net. This removes the dubious
-    0x8000 flag definitions on 16-bit systems. They aren't supported
-    yet the defs *probably* work, however it seems much safer to do this
-    and be advised if anyone, contrary to advice, is building libpng 1.6
-    on a 16-bit system. It also adds back various switch default clauses
-    for GCC; GCC errors out if they are not present (with an appropriately
-    high level of warnings).
-  Safely convert num_bytes to a png_byte in png_set_sig_bytes() (Robert
-    Seacord).
-  Fixed the recently reported 1's complement security issue by replacing
-    the value that is illegal in the PNG spec, in both signed and unsigned
-    values, with 0. Illegal unsigned values (anything greater than or equal
-    to  0x80000000) can still pass through, but since these are not illegal
-    in ANSI-C (unlike 0x80000000 in the signed case) the checking that
-    occurs later can catch them (John Bowler).
-
-Version 1.6.19beta03 [September 26, 2015]
-  Fixed png_save_int_32 when int is not 2's complement (John Bowler).
-  Updated libpng16 with all the recent test changes from libpng17,
-    including changes to pngvalid.c to ensure that the original,
-    distributed, version of contrib/visupng/cexcept.h can be used
-    (John Bowler).
-  pngvalid contains the correction to the use of SAVE/STORE_
-    UNKNOWN_CHUNKS; a bug revealed by changes in libpng 1.7. More
-    tests contain the --strict option to detect warnings and the
-    pngvalid-standard test has been corrected so that it does not
-    turn on progressive-read. There is a separate test which does
-    that. (John Bowler)
-  Also made some signed/unsigned fixes.
-  Make pngstest error limits version specific. Splitting the machine
-    generated error structs out to a file allows the values to be updated
-    without changing pngstest.c itself. Since libpng 1.6 and 1.7 have
-    slightly different error limits this simplifies maintenance. The
-    makepngs.sh script has also been updated to more accurately reflect
-    current problems in libpng 1.7 (John Bowler).
-  Incorporated new test PNG files into make check.  tests/pngstest-*
-    are changed so that the new test files are divided into 8 groups by
-    gamma and alpha channel.  These tests have considerably better code
-    and pixel-value coverage than contrib/pngsuite; however,coverage is
-    still incomplete (John Bowler).
-  Removed the '--strict' in 1.6 because of the double-gamma-correction
-    warning, updated pngstest-errors.h for the errors detected with the
-    new contrib/testspngs PNG test files (John Bowler).
-
-Version 1.6.19beta04 [October 15, 2015]
-  Worked around rgb-to-gray issues in libpng 1.6.  The previous
-    attempts to ignore the errors in the code aren't quite enough to
-    deal with the 'channel selection' encoding added to libpng 1.7; abort.
-    pngvalid.c is changed to drop this encoding in prior versions.
-  Fixed 'pow' macros in pngvalid.c. It is legal for 'pow' to be a
-    macro, therefore the argument list cannot contain preprocessing
-    directives.  Make sure pow is a function where this happens. This is
-    a minimal safe fix, the issue only arises in non-performance-critical
-    code (bug report by Curtis Leach, fix by John Bowler).
-  Added sPLT support to pngtest.c
-
-Version 1.6.19rc01 [October 23, 2015]
-  No changes.
-
-Version 1.6.19rc02 [October 31, 2015]
-  Prevent setting or writing over-length PLTE chunk (Cosmin Truta).
-  Silently truncate over-length PLTE chunk while reading.
-  Libpng incorrectly calculated the output rowbytes when the application
-    decreased either the number of channels or the bit depth (or both) in
-    a user transform.  This was safe; libpng overallocated buffer space
-   (potentially by quite a lot; up to 4 times the amount required) but,
-   from 1.5.4 on, resulted in a png_error (John Bowler).
-
-Version 1.6.19rc03 [November 3, 2015]
-  Fixed some inconsequential cut-and-paste typos in png_set_cHRM_XYZ_fixed().
-  Clarified COPYRIGHT information to state explicitly that versions
-    are derived from previous versions.
-  Removed much of the long list of previous versions from png.h and
-    libpng.3.
-
-Version 1.6.19rc04 [November 5, 2015]
-  Fixed new bug with CRC error after reading an over-length palette
-    (bug report by Cosmin Truta) (CVE-2015-8126).
-
-Version 1.6.19 [November 12, 2015]
-  Cleaned up coding style in png_handle_PLTE().
-
-Version 1.6.20beta01 [November 20, 2015]
-  Avoid potential pointer overflow/underflow in png_handle_sPLT() and
-    png_handle_pCAL() (Bug report by John Regehr).
-
-Version 1.6.20beta02 [November 23, 2015]
-  Fixed incorrect implementation of png_set_PLTE() that uses png_ptr
-    not info_ptr, that left png_set_PLTE() open to the CVE-2015-8126
-    vulnerability.  Fixes CVE-2015-8472.
-
-Version 1.6.20beta03 [November 24, 2015]
-  Backported tests from libpng-1.7.0beta69.
-
-Version 1.6.20rc01 [November 26, 2015]
-  Fixed an error in handling of bad zlib CMINFO field in pngfix, found by
-    American Fuzzy Lop, reported by Brian Carpenter.  inflate() doesn't
-    immediately fault a bad CMINFO field; instead a 'too far back' error
-    happens later (at least some times).  pngfix failed to limit CMINFO to
-    the allowed values but then assumed that window_bits was in range,
-    triggering an assert. The bug is mostly harmless; the PNG file cannot
-    be fixed.
-
-Version 1.6.20rc02 [November 29, 2015]
-  In libpng 1.6 zlib initialization was changed to use the window size
-    in the zlib stream, not a fixed value. This causes some invalid images,
-    where CINFO is too large, to display 'correctly' if the rest of the
-    data is valid.  This provides a workaround for zlib versions where the
-    error arises (ones that support the API change to use the window size
-    in the stream).
-
-Version 1.6.20 [December 3, 2015]
-  No changes.
-
-Version 1.6.21beta01 [December 11, 2015]
-  Fixed syntax "$(command)" in tests/pngstest that some shells other than
-    bash could not parse (Bug report by Nelson Beebe). Use `command` instead.
-
-Version 1.6.21beta02 [December 14, 2015]
-  Moved png_check_keyword() from pngwutil.c to pngset.c
-  Removed LE/BE dependencies in pngvalid, to 'fix' the current problem
-    in the BigEndian tests by not testing it, making the BE code the same
-    as the LE version.
-  Fixes to pngvalid for various reduced build configurations (eliminate unused
-    statics) and a fix for the case in rgb_to_gray when the digitize option
-    reduces graylo to 0, producing a large error.
-
-Version 1.6.21beta03 [December 18, 2015]
-  Widened the 'limit' check on the internally calculated error limits in
-    the 'DIGITIZE' case (the code used prior to 1.7 for rgb_to_gray error
-    checks) and changed the check to only operate in non-release builds
-    (base build type not RC or RELEASE.)
-  Fixed undefined behavior in pngvalid.c, undefined because
-    (png_byte) << shift is undefined if it changes the signed bit
-    (because png_byte is promoted to int). The libpng exported functions
-    png_get_uint_32 and png_get_uint_16 handle this. (Bug reported by
-    David Drysdale as a result of reports from UBSAN in clang 3.8).
-  This changes pngvalid to use BE random numbers; this used to produce
-    errors but these should not be fixed as a result of the previous changes.
-
-Version 1.6.21rc01 [January 4, 2016]
-  In projects/vstudio, combined readme.txt and WARNING into README.txt
-
-Version 1.6.21rc02 [January 7, 2016]
-  Relocated assert() in contrib/tools/pngfix.c, bug found by American
-    Fuzzy Lop, reported by Brian Carpenter.
-  Marked 'limit' UNUSED in transform_range_check().  This only affects
-    release builds.
-
-Version 1.6.21 [January 15, 2016]
-  Worked around a false-positive Coverity issue in pngvalid.c.
-
-Version 1.6.22beta01 [January 23, 2016]
-  Changed PNG_USE_MKSTEMP to __COVERITY__ to select alternate
-    "tmpfile()" implementation in contrib/libtests/pngstest.c
-  Fixed NO_STDIO build of pngunknown.c to skip calling png_init_io()
-    if there is no stdio.h support.
-  Added a png_image_write_to_memory() API and a number of assist macros
-    to allow an application that uses the simplified API write to bypass
-    stdio and write directly to memory.
-  Added some warnings (png.h) and some check code to detect *possible*
-    overflow in the ROW_STRIDE and simplified image SIZE macros.  This
-    disallows image width/height/format that *might* overflow.  This is
-    a quiet API change that limits in-memory image size (uncompressed) to
-    less than 4GByte and image row size (stride) to less than 2GByte.
-  Revised workaround for false-positive Coverity issue in pngvalid.c.
-
-Version 1.6.22beta02 [February 8, 2016]
-  Only use exit(77) in configure builds.
-  Corrected error in PNG_IMAGE_PNG_SIZE_MAX. This new macro underreported
-    the palette size because it failed to take into account that the memory
-    palette has to be expanded to full RGB when it is written to PNG.
-  Updated CMakeLists.txt, added supporting scripts/gen*.cmake.in
-    and test.cmake.in (Roger Leigh).
-  Relaxed limit checks on gamma values in pngrtran.c. As suggested in
-    the comments gamma values outside the range currently permitted
-    by png_set_alpha_mode are useful for HDR data encoding.  These values
-    are already permitted by png_set_gamma so it is reasonable caution to
-    extend the png_set_alpha_mode range as HDR imaging systems are starting
-    to emerge.
-
-Version 1.6.22beta03 [March 9, 2016]
-  Added a common-law trademark notice and export control information
-    to the LICENSE file, png.h, and the man page.
-  Restored "& 0xff" in png_save_uint_16() and png_save_uint_32() that
-    were accidentally removed from libpng-1.6.17.
-  Changed PNG_INFO_cHNK and PNG_FREE_cHNK from 0xnnnn to 0xnnnnU in png.h
-    (Robert C. Seacord).
-  Removed dubious "#if INT_MAX" test from png.h that was added to
-    libpng-1.6.19beta02 (John Bowler).
-  Add ${INCLUDES} in scripts/genout.cmake.in (Bug report by Nixon Kwok).
-  Updated LICENSE to say files in the contrib directory are not
-    necessarily under the libpng license, and that some makefiles have
-    other copyright owners.
-  Added INTEL-SSE2 support (Mike Klein and Matt Sarett, Google, Inc.).
-  Made contrib/libtests/timepng more robust.  The code no longer gives
-    up/fails on invalid PNG data, it just skips it (with error messages).
-    The code no longer fails on PNG files with data beyond IEND.  Options
-    exist to use png_read_png (reading the whole image, not by row) and, in
-    that case, to apply any of the supported transforms.  This makes for
-    more realistic testing; the decoded data actually gets used in a
-    meaningful fashion (John Bowler).
-  Fixed some misleading indentation (Krishnaraj Bhat).
-
-Version 1.6.22beta04 [April 5, 2016]
-  Force GCC compilation to C89 if needed (Dagobert Michelsen).
-  SSE filter speed improvements for bpp=3:
-    memcpy-free implementations of load3() / store3().
-    call load3() only when needed at the end of a scanline.
-
-Version 1.6.22beta05 [April 27, 2016]
-  Added PNG_FAST_FILTERS macro (defined as
-    PNG_FILTER_NONE|PNG_FILTER_SUB|PNG_FILTER_UP).
-  Various fixes for contrib/libtests/timepng.c
-  Moved INTEL-SSE code from pngpriv.h into contrib/intel/intel_sse.patch.
-  Fixed typo (missing underscore) in #define PNG_READ_16_TO_8_SUPPORTED
-    (Bug report by Y.Ohashik).
-
-Version 1.6.22beta06 [May 5, 2016]
-  Rebased contrib/intel_sse.patch.
-  Quieted two Coverity issues in contrib/libtests/timepng.c.
-  Fixed issues with scripts/genout.cmake.in (David Capello, Nixon Kwok):
-    Added support to use multiple directories in ZLIBINCDIR variable,
-    Fixed CMAKE_C_FLAGS with multiple values when genout is compiled on MSVC,
-    Fixed pnglibconf.c compilation on OS X including the sysroot path.
-
-Version 1.6.22rc01 [May 14, 2016]
-  No changes.
-
-Version 1.6.22rc02 [May 16, 2016]
-  Removed contrib/timepng from default build; it does not build on platforms
-    that don't supply clock_gettime().
-
-Version 1.6.22rc03 [May 17, 2016]
-  Restored contrib/timepng to default build but check for the presence
-    of clock_gettime() in configure.ac and Makefile.am.
-
-Version 1.6.22 [May 26, 2016]
-  No changes.
-
-Version 1.6.23beta01 [May 29, 2016]
-  Stop a potential memory leak in png_set_tRNS() (Bug report by Ted Ying).
-  Fixed the progressive reader to handle empty first IDAT chunk properly
-    (patch by Timothy Nikkel).  This bug was introduced in libpng-1.6.0 and
-    only affected the libpng16 branch.
-  Added tests in pngvalid.c to check zero-length IDAT chunks in various
-    positions.  Fixed the sequential reader to handle these more robustly
-    (John Bowler).
-
-Version 1.6.23rc01 [June 2, 2016]
-  Corrected progressive read input buffer in pngvalid.c. The previous version
-    the code invariably passed just one byte at a time to libpng.  The intent
-    was to pass a random number of bytes in the range 0..511.
-  Moved sse2 prototype from pngpriv.h to contrib/intel/intel_sse.patch.
-  Added missing ")" in pngerror.c (Matt Sarrett).
-
-Version 1.6.23rc02 [June 4, 2016]
-  Fixed undefined behavior in png_push_save_buffer(). Do not call
-    memcpy() with a null source, even if count is zero (Leon Scroggins III).
-
-Version 1.6.23 [June 9, 2016]
-  Fixed bad link to RFC2083 in png.5 (Nikola Forro).
-
-Version 1.6.24beta01 [June 11, 2016]
-  Avoid potential overflow of the PNG_IMAGE_SIZE macro.  This macro
-    is not used within libpng, but is used in some of the examples.
-
-Version 1.6.24beta02 [June 23, 2016]
-  Correct filter heuristic overflow handling. This was broken when the
-    write filter code was moved out-of-line; if there is a single filter and
-    the heuristic sum overflows the calculation of the filtered line is not
-    completed.  In versions prior to 1.6 the code was duplicated in-line
-    and the check not performed, so the filter operation completed; however,
-    in the multi-filter case where the sum is performed the 'none' filter would
-    be selected if all the sums overflowed, even if it wasn't in the filter
-    list.  The fix to the first problem is simply to provide PNG_SIZE_MAX as
-    the current lmins sum value; this means the sum can never exceed it and
-    overflows silently.  A reasonable compiler that does choose to inline
-    the code will simply eliminate the sum check.
-  The fix to the second problem is to use high precision arithmetic (this is
-    implemented in 1.7), however a simple safe fix here is to chose the lowest
-    numbered filter in the list from png_set_filter (this only works if the
-    first problem is also fixed) (John Bowler).
-  Use a more efficient absolute value calculation on SSE2 (Matthieu Darbois).
-  Fixed the case where PNG_IMAGE_BUFFER_SIZE can overflow in the application
-    as a result of the application using an increased 'row_stride'; previously
-    png_image_finish_read only checked for overflow on the base calculation of
-    components.  (I.e. it checked for overflow of a 32-bit number on the total
-    number of pixel components in the output format, not the possibly padded row
-    length and not the number of bytes, which for linear formats is twice the
-    number of components.)
-  MSVC does not like '-(unsigned)', so replaced it with 0U-(unsigned)
-  MSVC does not like (uInt) = -(unsigned) (i.e. as an initializer), unless
-    the conversion is explicitly invoked by a cast.
-  Put the SKIP definition in the correct place. It needs to come after the
-    png.h include (see all the other .c files in contrib/libtests) because it
-    depends on PNG_LIBPNG_VER.
-  Removed the three compile warning options from the individual project
-    files into the zlib.props globals.  It increases the warning level from 4
-    to All and adds a list of the warnings that need to be turned off.  This is
-    semi-documentary; the intent is to tell libpng users which warnings have
-    been examined and judged non-fixable at present.  The warning about
-    structure padding is fixable, but it would be a significant change (moving
-    structure members around).
-
-Version 1.6.24beta03 [July 4, 2016]
-  Optimized absolute value calculation in filter selection, similar to
-    code in the PAETH decoder in pngrutil.c. Build with PNG_USE_ABS to
-    use this.
-  Added pngcp to the build together with a pngcp.dfa configuration test.
-  Added high resolution timing to pngcp.
-  Added "Common linking failures" section to INSTALL.
-  Relocated misplaced #endif in png.c sRGB profile checking.
-  Fixed two Coverity issues in pngcp.c.
-
-Version 1.6.24beta04 [July 8, 2016]
-  Avoid filter-selection heuristic sum calculations in cases where only one
-    filter is a candidate for selection. This trades off code size (added
-    private png_setup_*_row_only() functions) for speed.
-
-Version 1.6.24beta05 [July 13, 2016]
-  Fixed some indentation to comply with our coding style.
-  Added contrib/tools/reindent.
-
-Version 1.6.24beta06 [July 18, 2016]
-  Fixed more indentation to comply with our coding style.
-  Eliminated unnecessary tests of boolean png_isaligned() vs 0.
-
-Version 1.6.24rc01 [July 25, 2016]
-  No changes.
-
-Version 1.6.24rc02 [August 1, 2016]
-  Conditionally compile SSE2 headers in contrib/intel/intel_sse.patch
-  Conditionally compile png_decompress_chunk().
-
-Version 1.6.24rc03 [August 2, 2016]
-  Conditionally compile ARM_NEON headers in pngpriv.h
-  Updated contrib/intel/intel_sse.patch
-
-Version 1.6.24[August 4, 2016]
-  No changes.
-
-Version 1.6.25beta01 [August 12, 2016]
-  Reject oversized iCCP profile immediately.
-  Cleaned up PNG_DEBUG compile of pngtest.c.
-  Conditionally compile png_inflate().
-
-Version 1.6.25beta02 [August 18, 2016]
-  Don't install pngcp; it conflicts with pngcp in the pngtools package.
-  Minor editing of INSTALL, (whitespace, added copyright line)
-
-Version 1.6.25rc01 [August 24, 2016]
-  No changes.
-
-Version 1.6.25rc02 [August 29, 2016]
-  Added MIPS support (Mandar Sahastrabuddhe <Mandar.Sahastrabuddhe@imgtec.com>).
-  Only the UP filter is currently implemented.
-
-Version 1.6.25rc03 [August 29, 2016]
-  Rebased contrib/intel/intel_sse.patch after the MIPS implementation.
-
-Version 1.6.25rc04 [August 30, 2016]
-  Added MIPS support for SUB, AVG, and PAETH filters (Mandar Sahastrabuddhe).
-
-Version 1.6.25rc05 [August 30, 2016]
-  Rebased contrib/intel/intel_sse.patch after the MIPS implementation update..
-
-Version 1.6.25 [September 1, 2016]
-  No changes.
-
-Version 1.6.26beta01 [September 26, 2016]
-  Fixed handling zero length IDAT in pngfix (bug report by Agostino Sarubbo,
-    bugfix by John Bowler).
-  Do not issue a png_error() on read in png_set_pCAL() because png_handle_pCAL
-    has allocated memory that libpng needs to free.
-  Conditionally compile png_set_benign_errors() in pngread.c and pngtest.c
-  Issue a png_benign_error instead of a png_error on ADLER32 mismatch
-    while decoding compressed data chunks.
-  Changed PNG_ZLIB_VERNUM to ZLIB_VERNUM in pngpriv.h, pngstruct.h, and
-    pngrutil.c.
-  If CRC handling of critical chunks has been set to PNG_CRC_QUIET_USE,
-    ignore the ADLER32 checksum in the IDAT chunk as well as the chunk CRCs.
-  Issue png_benign_error() on ADLER32 checksum mismatch instead of png_error().
-  Add tests/badcrc.png and tests/badadler.png to tests/pngtest.
-  Merged pngtest.c with libpng-1.7.0beta84/pngtest.c
-
-Version 1.6.26beta02 [October 1, 2016]
-  Updated the documentation about CRC and ADLER32 handling.
-  Quieted 117 warnings from clang-3.8 in pngtrans.c, pngread.c,
-     pngwrite.c, pngunknown.c, and pngvalid.c.
-  Quieted 58 (out of 144) -Wconversion compiler warnings by changing
-    flag definitions in pngpriv.h from 0xnnnn to 0xnnnnU and trivial changes
-    in png.c, pngread.c, and pngwutil.c.
-
-Version 1.6.26beta03 [October 2, 2016]
-  Removed contrib/libtests/*.orig and *.rej that slipped into the tarballs.
-  Quieted the 86 remaining -Wconversion compiler warnings by
-    revising the png_isaligned() macro and trivial changes in png.c,
-    pngerror.c, pngget.c, pngmem.c, pngset.c, pngrtran.c, pngrutil.c,
-    pngwtran.c, pngwrite.c, and pngwutil.c.
-
-Version 1.6.26beta04 [October 3, 2016]
-  Quieted (bogus?) clang warnings about "absolute value has no effect"
-    when PNG_USE_ABS is defined.
-  Fixed offsets in contrib/intel/intel_sse.patch
-
-Version 1.6.26beta05 [October 6, 2016]
-  Changed integer constant 4294967294 to unsigned 4294967294U in pngconf.h
-    to avoid a signed/unsigned compare in the preprocessor.
-
-Version 1.6.26beta06 [October 7, 2016]
-  Use zlib-1.2.8.1 inflateValidate() instead of inflateReset2() to
-    optionally avoid ADLER32 evaluation.
-
-Version 1.6.26rc01 [October 12, 2016]
-  No changes.
-
-Version 1.6.26 [October 20, 2016]
-  Cosmetic change, "ptr != 0" to "ptr != NULL" in png.c and pngrutil.c
-  Despammed email addresses (replaced "@" with " at ").
-
-Version 1.6.27beta01 [November 2, 2016]
-  Restrict the new ADLER32-skipping to IDAT chunks.  It broke iCCP chunk
-    handling: an erroneous iCCP chunk would throw a png_error and reject the
-    entire PNG image instead of rejecting just the iCCP chunk with a warning,
-    if built with zlib-1.2.8.1.
-
-Version 1.6.27rc01 [December 27, 2016]
-  Control ADLER32 checking with new PNG_IGNORE_ADLER32 option. Fixes
-    an endless loop when handling erroneous ADLER32 checksums; bug
-    introduced in libpng-1.6.26.
-  Removed the use of a macro containing the pre-processor 'defined'
-    operator.  It is unclear whether this is valid; a macro that
-    "generates" 'defined' is not permitted, but the use of the word
-    "generates" within the C90 standard seems to imply more than simple
-    substitution of an expression itself containing a well-formed defined
-    operation.
-  Added ARM support to CMakeLists.txt (Andreas Franek).
-
-Version 1.6.27 [December 29, 2016]
-  Fixed a potential null pointer dereference in png_set_text_2() (bug report
-    and patch by Patrick Keshishian, CVE-2016-10087).
-
-Version 1.6.28rc01 [January 3, 2017]
-  Fixed arm/aarch64 detection in CMakeLists.txt (Gianfranco Costamagna).
-  Added option to Cmake build allowing a custom location of zlib to be
-    specified in a scenario where libpng is being built as a subproject
-    alongside zlib by another project (Sam Serrels).
-  Changed png_ptr->options from a png_byte to png_uint_32, to accommodate
-    up to 16 options.
-
-Version 1.6.28rc02 [January 4, 2017]
-  Added "include(GNUInstallDirs)" to CMakeLists.txt (Gianfranco Costamagna).
-  Moved SSE2 optimization code into the main libpng source directory.
-    Configure libpng with "configure --enable-intel-sse" or compile
-    libpng with "-DPNG_INTEL_SSE" in CPPFLAGS to enable it.
-
-Version 1.6.28rc03 [January 4, 2017]
-  Backed out the SSE optimization and last CMakeLists.txt to allow time for QA.
-
-Version 1.6.28 [January 5, 2017]
-  No changes.
-
-Version 1.6.29beta01 [January 12, 2017]
-  Readded "include(GNUInstallDirs)" to CMakeLists.txt (Gianfranco Costamagna).
-  Moved SSE2 optimization code into the main libpng source directory.
-    Configure libpng with "configure --enable-intel-sse" or compile
-    libpng with "-DPNG_INTEL_SSE" in CPPFLAGS to enable it.
-  Simplified conditional compilation in pngvalid.c, for AIX (Michael Felt).
-
-Version 1.6.29beta02 [February 22, 2017]
-  Avoid conditional directives that break statements in pngrutil.c (Romero
-    Malaquias)
-  The contrib/examples/pngtopng.c recovery code was in the wrong "if"
-    branches; the comments were correct.
-  Added code for PowerPC VSX optimisation (Vadim Barkov).
-
-Version 1.6.29beta03 [March 1, 2017]
-  Avoid potential overflow of shift operations in png_do_expand() (Aaron Boxer).
-  Change test ZLIB_VERNUM >= 0x1281 to ZLIB_VERNUM >= 0x1290 in pngrutil.c
-    because Solaris 11 distributes zlib-1.2.8.f that is older than 1.2.8.1,
-    as suggested in zlib FAQ, item 24.
-  Suppress clang warnings about implicit sign changes in png.c
-
-Version 1.6.29 [March 16, 2017]
-  No changes.
-
-Version 1.6.30beta01 [April 1, 2017]
-  Added missing "$(CPPFLAGS)" to the compile line for c.pic.o in
-    makefile.linux and makefile.solaris-x86 (Cosmin).
-  Revised documentation of png_get_error_ptr() in the libpng manual.
-  Silence clang -Wcomma and const drop warnings (Viktor Szakats).
-  Update Sourceforge URLs in documentation (https instead of http).
-
-Version 1.6.30beta02 [April 22, 2017]
-  Document need to check for integer overflow when allocating a pixel
-    buffer for multiple rows in contrib/gregbook, contrib/pngminus,
-    example.c, and in the manual (suggested by Jaeseung Choi). This
-    is similar to the bug reported against pngquant in CVE-2016-5735.
-  Removed reference to the obsolete PNG_SAFE_LIMITS macro in the documentation.
-
-Version 1.6.30beta03 [May 22, 2017]
-  Check for integer overflow in contrib/visupng and contrib/tools/genpng.
-  Do not double evaluate CMAKE_SYSTEM_PROCESSOR in CMakeLists.txt.
-  Test CMAKE_HOST_WIN32 instead of WIN32 in CMakeLists.txt.
-  Fix some URL in documentation.
-
-Version 1.6.30beta04 [June 7, 2017]
-  Avoid writing an empty IDAT when the last IDAT exactly fills the
-    compression buffer (bug report by Brian Baird). This bug was
-    introduced in libpng-1.6.0.
-
-Version 1.6.30rc01 [June 14, 2017]
-  No changes.
-
-Version 1.6.30rc02 [June 25, 2017]
-  Update copyright year in pnglibconf.h, make ltmain.sh executable.
-  Add a reference to the libpng.download site in README.
-
-Version 1.6.30 [June 28, 2017]
-  No changes.
-
-Version 1.6.31beta01 [July 5, 2017]
-  Guard the definition of _POSIX_SOURCE in pngpriv.h (AIX already defines it;
-    bug report by Michael Felt).
-  Revised pngpriv.h to work around failure to compile arm/filter_neon.S
-    ("typedef" directive is unrecognized by the assembler). The problem
-    was introduced in libpng-1.6.30beta01.
-  Added "Requires: zlib" to libpng.pc.in (Pieter Neerincx).
-  Added special case for FreeBSD in arm/filter_neon.S (Maya Rashish).
-
-Version 1.6.31beta02 [July 8, 2017]
-  Added instructions for disabling hardware optimizations in INSTALL.
-  Added "--enable-hardware-optimizations" configuration flag to enable
-    or disable all hardware optimizations with one flag.
-
-Version 1.6.31beta03 [July 9, 2017]
-  Updated CMakeLists.txt to add INTEL_SSE and MIPS_MSA platforms.
-  Changed "int" to "png_size_t" in intel/filter_sse2.c to prevent
-    possible integer overflow (Bug report by John Bowler).
-  Quieted "declaration after statement" warnings in intel/filter_sse2.c.
-  Added scripts/makefile-linux-opt, which has hardware optimizations enabled.
-
-Version 1.6.31beta04 [July 11, 2017]
-  Removed one of the GCC-7.1.0 'strict-overflow' warnings that result when
-    integers appear on both sides of a compare.  Worked around the others by
-    forcing the strict-overflow setting in the relevant functions to a level
-    where they are not reported (John Bowler).
-  Changed "FALL THROUGH" comments to "FALLTHROUGH" because GCC doesn't like
-    the space.
-  Worked around some C-style casts from (void*) because g++ 5.4.0 objects
-    to them.
-  Increased the buffer size for 'sprint' to pass the gcc 7.1.0 'sprint
-    overflow' check that is on by default with -Wall -Wextra.
-
-Version 1.6.31beta05 [July 13, 2017]
-  Added eXIf chunk support.
-
-Version 1.6.31beta06 [July 17, 2017]
-  Added a minimal eXIf chunk (with Orientation and FocalLengthIn35mmFilm
-    tags) to pngtest.png.
-
-Version 1.6.31beta07 [July 18, 2017]
-  Revised the eXIf chunk in pngtest.png to fix "Bad IFD1 Directory" warning.
-
-Version 1.6.31rc01 [July 19, 2017]
-  No changes.
-
-Version 1.6.31rc02 [July 25, 2017]
-  Fixed typo in example.c (png_free_image should be png_image_free) (Bug
-    report by John Smith)
-
-Version 1.6.31 [July 27, 2017]
-  No changes.
-
-Version 1.6.32beta01 [July 31, 2017]
-  Avoid possible NULL dereference in png_handle_eXIf when benign_errors
-    are allowed. Avoid leaking the input buffer "eXIf_buf".
-  Eliminated png_ptr->num_exif member from pngstruct.h and added num_exif
-    to arguments for png_get_eXIf() and png_set_eXIf().
-  Added calls to png_handle_eXIf(() in pngread.c and png_write_eXIf() in
-    pngwrite.c, and made various other fixes to png_write_eXIf().
-  Changed name of png_get_eXIF and png_set_eXIf() to png_get_eXIf_1() and
-    png_set_eXIf_1(), respectively, to avoid breaking API compatibility
-    with libpng-1.6.31.
-
-Version 1.6.32beta02 [August 1, 2017]
-  Updated contrib/libtests/pngunknown.c with eXIf chunk.
-
-Version 1.6.32beta03 [August 2, 2017]
-  Initialized btoa[] in pngstest.c
-  Stop memory leak when returning from png_handle_eXIf() with an error
-    (Bug report from the OSS-fuzz project).
-
-Version 1.6.32beta04 [August 2, 2017]
-  Replaced local eXIf_buf with info_ptr->eXIf_buf in png_handle_eXIf().
-  Update libpng.3 and libpng-manual.txt about eXIf functions.
-
-Version 1.6.32beta05 [August 2, 2017]
-  Restored png_get_eXIf() and png_set_eXIf() to maintain API compatibility.
-
-Version 1.6.32beta06 [August 2, 2017]
-  Removed png_get_eXIf_1() and png_set_eXIf_1().
-
-Version 1.6.32beta07 [August 3, 2017]
-  Check length of all chunks except IDAT against user limit to fix an
-    OSS-fuzz issue (Fixes CVE-2017-12652).
-
-Version 1.6.32beta08 [August 3, 2017]
-  Check length of IDAT against maximum possible IDAT size, accounting
-    for height, rowbytes, interlacing and zlib/deflate overhead.
-  Restored png_get_eXIf_1() and png_set_eXIf_1(), because strlen(eXIf_buf)
-    does not work (the eXIf chunk data can contain zeroes).
-
-Version 1.6.32beta09 [August 3, 2017]
-  Require cmake-2.8.8 in CMakeLists.txt. Revised symlink creation,
-    no longer using deprecated cmake LOCATION feature (Clifford Yapp).
-  Fixed five-byte error in the calculation of IDAT maximum possible size.
-
-Version 1.6.32beta10 [August 5, 2017]
-  Moved chunk-length check into a png_check_chunk_length() private
-    function (Suggested by Max Stepin).
-  Moved bad pngs from tests to contrib/libtests/crashers
-  Moved testing of bad pngs into a separate tests/pngtest-badpngs script
-  Added the --xfail (expected FAIL) option to pngtest.c. It writes XFAIL
-    in the output but PASS for the libpng test.
-  Require cmake-3.0.2 in CMakeLists.txt (Clifford Yapp).
-  Fix "const" declaration info_ptr argument to png_get_eXIf_1() and the
-    num_exif argument to png_get_eXIf_1() (Github Issue 171).
-
-Version 1.6.32beta11 [August 7, 2017]
-  Added "eXIf" to "chunks_to_ignore[]" in png_set_keep_unknown_chunks().
-  Added huge_IDAT.png and empty_ancillary_chunks.png to testpngs/crashers.
-  Make pngtest --strict, --relax, --xfail options imply -m (multiple).
-  Removed unused chunk_name parameter from png_check_chunk_length().
-  Relocated setting free_me for eXIf data, to stop an OSS-fuzz leak.
-  Initialize profile_header[] in png_handle_iCCP() to fix OSS-fuzz issue.
-  Initialize png_ptr->row_buf[0] to 255 in png_read_row() to fix OSS-fuzz UMR.
-  Attempt to fix a UMR in png_set_text_2() to fix OSS-fuzz issue.
-  Increase minimum zlib stream from 9 to 14 in png_handle_iCCP(), to account
-    for the minimum 'deflate' stream, and relocate the test to a point
-    after the keyword has been read.
-  Check that the eXIf chunk has at least 2 bytes and begins with "II" or "MM".
-
-Version 1.6.32rc01 [August 18, 2017]
-  Added a set of "huge_xxxx_chunk.png" files to contrib/testpngs/crashers,
-    one for each known chunk type, with length = 2GB-1.
-  Check for 0 return from png_get_rowbytes() and added some (size_t) typecasts
-    in contrib/pngminus/*.c to stop some Coverity issues (162705, 162706,
-    and 162707).
-  Renamed chunks in contrib/testpngs/crashers to avoid having files whose
-    names differ only in case; this causes problems with some platforms
-    (github issue #172).
-
-Version 1.6.32rc02 [August 22, 2017]
-  Added contrib/oss-fuzz directory which contains files used by the oss-fuzz
-    project (https://github.com/google/oss-fuzz/tree/master/projects/libpng).
-
-Version 1.6.32 [August 24, 2017]
-  No changes.
-
-Version 1.6.33beta01 [August 28, 2017]
-  Added PNGMINUS_UNUSED macro to contrib/pngminus/p*.c and added missing
-    parenthesis in contrib/pngminus/pnm2png.c (bug report by Christian Hesse).
-  Fixed off-by-one error in png_do_check_palette_indexes() (Bug report
-    by Mick P., Source Forge Issue #269).
-
-Version 1.6.33beta02 [September 3, 2017]
-  Initialize png_handler.row_ptr in contrib/oss-fuzz/libpng_read_fuzzer.cc
-    to fix shortlived oss-fuzz issue 3234.
-  Compute a larger limit on IDAT because some applications write a deflate
-    buffer for each row (Bug report by Andrew Church).
-  Use current date (DATE) instead of release-date (RDATE) in last
-    changed date of contrib/oss-fuzz files.
-  Enabled ARM support in CMakeLists.txt (Bernd Kuhls).
-
-Version 1.6.33beta03 [September 14, 2017]
-  Fixed incorrect typecast of some arguments to png_malloc() and
-    png_calloc() that were png_uint_32 instead of png_alloc_size_t
-    (Bug report by "irwir" in Github libpng issue #175).
-  Use pnglibconf.h.prebuilt when building for ANDROID with cmake (Github
-    issue 162, by rcdailey).
-
-Version 1.6.33rc01 [September 20, 2017]
-  Initialize memory allocated by png_inflate to zero, using memset, to
-    stop an oss-fuzz "use of uninitialized value" detection in png_set_text_2()
-    due to truncated iTXt or zTXt chunk.
-  Initialize memory allocated by png_read_buffer to zero, using memset, to
-    stop an oss-fuzz "use of uninitialized value" detection in
-    png_icc_check_tag_table() due to truncated iCCP chunk.
-  Removed a redundant test (suggested by "irwir" in Github issue #180).
-
-Version 1.6.33rc02 [September 23, 2017]
-  Added an interlaced version of each file in contrib/pngsuite.
-  Relocate new memset() call in pngrutil.c.
-  Removed more redundant tests (suggested by "irwir" in Github issue #180).
-  Add support for loading images with associated alpha in the Simplified
-    API (Samuel Williams).
-
-Version 1.6.33 [September 28, 2017]
-  Revert contrib/oss-fuzz/libpng_read_fuzzer.cc to libpng-1.6.32 state.
-  Initialize png_handler.row_ptr in contrib/oss-fuzz/libpng_read_fuzzer.cc
-  Add end_info structure and png_read_end() to the libpng fuzzer.
-
-Version 1.6.34 [September 29, 2017]
-  Removed contrib/pngsuite/i*.png; some of them caused test failures.
-
-Version 1.6.35beta01 [March 6, 2018]
-  Restored 21 of the contrib/pngsuite/i*.png, which do not cause test
-    failures. Placed the remainder in contrib/pngsuite/interlaced/i*.png.
-  Added calls to png_set_*() transforms commonly used by browsers to
-    the fuzzer.
-  Removed some unnecessary brackets in pngrtran.c
-  Fixed miscellaneous typos (Patch by github user "luzpaz").
-  Change "ASM C" to "C ASM" in CMakeLists.txt
-  Fixed incorrect handling of bKGD chunk in sub-8-bit files (Cosmin)
-  Added hardware optimization directories to zip and 7z distributions.
-  Fixed incorrect bitmask for options.
-  Fixed many spelling typos.
-
-Version 1.6.35beta02 [March 28, 2018]
-  Make png_get_iCCP consistent with man page (allow compression-type argument
-  to be NULL, bug report by Lenard Szolnoki).
-
-Version 1.6.35 [July 15, 2018]
-  Replaced the remaining uses of png_size_t with size_t (Cosmin)
-  Fixed the calculation of row_factor in png_check_chunk_length
-    (reported by Thuan Pham in SourceForge issue #278)
-  Added missing parentheses to a macro definition
-    (suggested by "irwir" in GitHub issue #216)
-
-Version 1.6.36 [December 1, 2018]
-  Optimized png_do_expand_palette for ARM processors.
-  Improved performance by around 10-22% on a recent ARM Chromebook.
-    (Contributed by Richard Townsend, ARM Holdings)
-  Fixed manipulation of machine-specific optimization options.
-    (Contributed by Vicki Pfau)
-  Used memcpy instead of manual pointer arithmetic on Intel SSE2.
-    (Contributed by Samuel Williams)
-  Fixed build errors with MSVC on ARM64.
-    (Contributed by Zhijie Liang)
-  Fixed detection of libm in CMakeLists.
-    (Contributed by Cameron Cawley)
-  Fixed incorrect creation of pkg-config file in CMakeLists.
-    (Contributed by Kyle Bentley)
-  Fixed the CMake build on Windows MSYS by avoiding symlinks.
-  Fixed a build warning on OpenBSD.
-    (Contributed by Theo Buehler)
-  Fixed various typos in comments.
-    (Contributed by "luz.paz")
-  Raised the minimum required CMake version from 3.0.2 to 3.1.
-  Removed yet more of the vestigial support for pre-ANSI C compilers.
-  Removed ancient makefiles for ancient systems that have been broken
-    across all previous libpng-1.6.x versions.
-  Removed the Y2K compliance statement and the export control
-    information.
-  Applied various code style and documentation fixes.
-
-Version 1.6.37 [April 14, 2019]
-  Fixed a use-after-free vulnerability (CVE-2019-7317) in png_image_free.
-  Fixed a memory leak in the ARM NEON implementation of png_do_expand_palette.
-  Fixed a memory leak in pngtest.c.
-  Fixed two vulnerabilities (CVE-2018-14048, CVE-2018-14550) in
-    contrib/pngminus; refactor.
-  Changed the license of contrib/pngminus to MIT; refresh makefile and docs.
-    (Contributed by Willem van Schaik)
-  Fixed a typo in the libpng license v2.
-    (Contributed by Miguel Ojeda)
-  Added makefiles for AddressSanitizer-enabled builds.
-  Cleaned up various makefiles.
-
-Version 1.6.38 [September 14, 2022]
-  Added configurations and scripts for continuous integration.
-  Fixed various errors in the handling of tRNS, hIST and eXIf.
-  Implemented many stability improvements across all platforms.
-  Updated the internal documentation.
-
-Version 1.6.39 [November 20, 2022]
-  Changed the error handler of oversized chunks (i.e. larger than
-    PNG_USER_CHUNK_MALLOC_MAX) from png_chunk_error to png_benign_error.
-  Fixed a buffer overflow error in contrib/tools/pngfix.
-  Fixed a memory leak (CVE-2019-6129) in contrib/tools/pngcp.
-  Disabled the ARM Neon optimizations by default in the CMake file,
-    following the default behavior of the configure script.
-  Allowed configure.ac to work with the trunk version of autoconf.
-  Removed the support for "install" targets from the legacy makefiles;
-    removed the obsolete makefile.cegcc.
-  Cleaned up the code and updated the internal documentation.
-
-Version 1.6.40 [June 21, 2023]
-  Fixed the eXIf chunk multiplicity checks.
-  Fixed a memory leak in pCAL processing.
-  Corrected the validity report about tRNS inside png_get_valid().
-  Fixed various build issues on *BSD, Mac and Windows.
-  Updated the configurations and the scripts for continuous integration.
-  Cleaned up the code, the build scripts, and the documentation.
-
-Version 1.6.41 [January 24, 2024]
-  Added SIMD-optimized code for the LoongArch LSX hardware.
-    (Contributed by GuXiWei, JinBo and ZhangLixia)
-  Fixed the run-time discovery of MIPS MSA hardware.
-    (Contributed by Sui Jingfeng)
-  Fixed an off-by-one error in the function png_do_check_palette_indexes(),
-    which failed to recognize errors that might have existed in the first
-    column of a broken palette-encoded image. This was a benign regression
-    accidentally introduced in libpng-1.6.33. No pixel was harmed.
-    (Contributed by Adam Richter; reviewed by John Bowler)
-  Fixed, improved and modernized the contrib/pngminus programs, i.e.,
-    png2pnm.c and pnm2png.c
-  Removed old and peculiar portability hacks that were meant to silence
-    warnings issued by gcc version 7.1 alone.
-    (Contributed by John Bowler)
-  Fixed and modernized the CMake file, and raised the minimum required
-    CMake version from 3.1 to 3.6.
-    (Contributed by Clinton Ingram, Timothy Lyanguzov, Tyler Kropp, et al.)
-  Allowed the configure script to disable the building of auxiliary tools
-    and tests, thus catching up with the CMake file.
-    (Contributed by Carlo Bramini)
-  Fixed a build issue on Mac.
-    (Contributed by Zixu Wang)
-  Moved the Autoconf macro files to scripts/autoconf.
-  Moved the CMake files (except for the main CMakeLists.txt) to
-    scripts/cmake and moved the list of their contributing authors to
-    scripts/cmake/AUTHORS.md
-  Updated the CI configurations and scripts.
-  Relicensed the CI scripts to the MIT License.
-  Improved the test coverage.
-    (Contributed by John Bowler)
-
-Version 1.6.42 [January 29, 2024]
-  Fixed the implementation of the macro function png_check_sig().
-    This was an API regression, introduced in libpng-1.6.41.
-    (Reported by Matthieu Darbois)
-  Fixed and updated the libpng manual.
-
-Version 1.6.43 [February 23, 2024]
-  Fixed the row width check in png_check_IHDR().
-    This corrected a bug that was specific to the 16-bit platforms,
-    and removed a spurious compiler warning from the 64-bit builds.
-    (Reported by Jacek Caban; fixed by John Bowler)
-  Added eXIf chunk support to the push-mode reader in pngpread.c.
-    (Contributed by Chris Blume)
-  Added contrib/pngexif for the benefit of the users who would like
-    to inspect the content of eXIf chunks.
-  Added contrib/conftest/basic.dfa, a basic build-time configuration.
-    (Contributed by John Bowler)
-  Fixed a preprocessor condition in pngread.c that broke build-time
-    configurations like contrib/conftest/pngcp.dfa.
-    (Contributed by John Bowler)
-  Added CMake build support for LoongArch LSX.
-    (Contributed by GuXiWei)
-  Fixed a CMake build error that occurred under a peculiar state of the
-    dependency tree. This was a regression introduced in libpng-1.6.41.
-    (Contributed by Dan Rosser)
-  Marked the installed libpng headers as system headers in CMake.
-    (Contributed by Benjamin Buch)
-  Updated the build support for RISCOS.
-    (Contributed by Cameron Cawley)
-  Updated the makefiles to allow cross-platform builds to initialize
-    conventional make variables like AR and ARFLAGS.
-  Added various improvements to the CI scripts in areas like version
-    consistency verification and text linting.
-  Added version consistency verification to pngtest.c also.
-
-Send comments/corrections/commendations to png-mng-implement at lists.sf.net.
-Subscription is required; visit
-https://lists.sourceforge.net/lists/listinfo/png-mng-implement
-to subscribe.
diff --git a/dep/libpng/CMakeLists.txt b/dep/libpng/CMakeLists.txt
deleted file mode 100644
index 82b01bfb8..000000000
--- a/dep/libpng/CMakeLists.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-add_library(png
-  src/png.c
-  src/pngerror.c
-  src/pngget.c
-  src/pngmem.c
-  src/pngpread.c
-  src/pngread.c
-  src/pngrio.c
-  src/pngrtran.c
-  src/pngrutil.c
-  src/pngset.c
-  src/pngtrans.c
-  src/pngwio.c
-  src/pngwrite.c
-  src/pngwtran.c
-  src/pngwutil.c
-)
-
-if(CPU_ARCH_X64)
-  target_sources(png PRIVATE
-    src/intel/filter_sse2_intrinsics.c
-    src/intel/intel_init.c
-  )
-  target_compile_definitions(png PRIVATE "PNG_INTEL_SSE")
-elseif(CPU_ARCH_ARM32 OR CPU_ARCH_ARM64)
-  target_sources(png PRIVATE
-    src/arm/arm_init.c
-    src/arm/filter_neon.S
-    src/arm/filter_neon_intrinsics.c
-    src/arm/palette_neon_intrinsics.c
-  )
-endif()
-
-target_link_libraries(png PRIVATE ZLIB::ZLIB)
-target_include_directories(png PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
-target_include_directories(png PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src")
-disable_compiler_warnings_for_target(png)
-
-add_library(PNG::PNG ALIAS png)
diff --git a/dep/libpng/LICENSE b/dep/libpng/LICENSE
deleted file mode 100644
index 25f298f0f..000000000
--- a/dep/libpng/LICENSE
+++ /dev/null
@@ -1,134 +0,0 @@
-COPYRIGHT NOTICE, DISCLAIMER, and LICENSE
-=========================================
-
-PNG Reference Library License version 2
----------------------------------------
-
- * Copyright (c) 1995-2024 The PNG Reference Library Authors.
- * Copyright (c) 2018-2024 Cosmin Truta.
- * Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson.
- * Copyright (c) 1996-1997 Andreas Dilger.
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
-
-The software is supplied "as is", without warranty of any kind,
-express or implied, including, without limitation, the warranties
-of merchantability, fitness for a particular purpose, title, and
-non-infringement.  In no event shall the Copyright owners, or
-anyone distributing the software, be liable for any damages or
-other liability, whether in contract, tort or otherwise, arising
-from, out of, or in connection with the software, or the use or
-other dealings in the software, even if advised of the possibility
-of such damage.
-
-Permission is hereby granted to use, copy, modify, and distribute
-this software, or portions hereof, for any purpose, without fee,
-subject to the following restrictions:
-
- 1. The origin of this software must not be misrepresented; you
-    must not claim that you wrote the original software.  If you
-    use this software in a product, an acknowledgment in the product
-    documentation would be appreciated, but is not required.
-
- 2. Altered source versions must be plainly marked as such, and must
-    not be misrepresented as being the original software.
-
- 3. This Copyright notice may not be removed or altered from any
-    source or altered source distribution.
-
-
-PNG Reference Library License version 1 (for libpng 0.5 through 1.6.35)
------------------------------------------------------------------------
-
-libpng versions 1.0.7, July 1, 2000, through 1.6.35, July 15, 2018 are
-Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson, are
-derived from libpng-1.0.6, and are distributed according to the same
-disclaimer and license as libpng-1.0.6 with the following individuals
-added to the list of Contributing Authors:
-
-    Simon-Pierre Cadieux
-    Eric S. Raymond
-    Mans Rullgard
-    Cosmin Truta
-    Gilles Vollant
-    James Yu
-    Mandar Sahastrabuddhe
-    Google Inc.
-    Vadim Barkov
-
-and with the following additions to the disclaimer:
-
-    There is no warranty against interference with your enjoyment of
-    the library or against infringement.  There is no warranty that our
-    efforts or the library will fulfill any of your particular purposes
-    or needs.  This library is provided with all faults, and the entire
-    risk of satisfactory quality, performance, accuracy, and effort is
-    with the user.
-
-Some files in the "contrib" directory and some configure-generated
-files that are distributed with libpng have other copyright owners, and
-are released under other open source licenses.
-
-libpng versions 0.97, January 1998, through 1.0.6, March 20, 2000, are
-Copyright (c) 1998-2000 Glenn Randers-Pehrson, are derived from
-libpng-0.96, and are distributed according to the same disclaimer and
-license as libpng-0.96, with the following individuals added to the
-list of Contributing Authors:
-
-    Tom Lane
-    Glenn Randers-Pehrson
-    Willem van Schaik
-
-libpng versions 0.89, June 1996, through 0.96, May 1997, are
-Copyright (c) 1996-1997 Andreas Dilger, are derived from libpng-0.88,
-and are distributed according to the same disclaimer and license as
-libpng-0.88, with the following individuals added to the list of
-Contributing Authors:
-
-    John Bowler
-    Kevin Bracey
-    Sam Bushell
-    Magnus Holmgren
-    Greg Roelofs
-    Tom Tanner
-
-Some files in the "scripts" directory have other copyright owners,
-but are released under this license.
-
-libpng versions 0.5, May 1995, through 0.88, January 1996, are
-Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
-
-For the purposes of this copyright and license, "Contributing Authors"
-is defined as the following set of individuals:
-
-    Andreas Dilger
-    Dave Martindale
-    Guy Eric Schalnat
-    Paul Schmidt
-    Tim Wegner
-
-The PNG Reference Library is supplied "AS IS".  The Contributing
-Authors and Group 42, Inc. disclaim all warranties, expressed or
-implied, including, without limitation, the warranties of
-merchantability and of fitness for any purpose.  The Contributing
-Authors and Group 42, Inc. assume no liability for direct, indirect,
-incidental, special, exemplary, or consequential damages, which may
-result from the use of the PNG Reference Library, even if advised of
-the possibility of such damage.
-
-Permission is hereby granted to use, copy, modify, and distribute this
-source code, or portions hereof, for any purpose, without fee, subject
-to the following restrictions:
-
- 1. The origin of this source code must not be misrepresented.
-
- 2. Altered versions must be plainly marked as such and must not
-    be misrepresented as being the original source.
-
- 3. This Copyright notice may not be removed or altered from any
-    source or altered source distribution.
-
-The Contributing Authors and Group 42, Inc. specifically permit,
-without fee, and encourage the use of this source code as a component
-to supporting the PNG file format in commercial products.  If you use
-this source code in a product, acknowledgment is not required but would
-be appreciated.
diff --git a/dep/libpng/README b/dep/libpng/README
deleted file mode 100644
index a6ca3ae9f..000000000
--- a/dep/libpng/README
+++ /dev/null
@@ -1,184 +0,0 @@
-README for libpng version 1.6.43
-================================
-
-See the note about version numbers near the top of `png.h`.
-See `INSTALL` for instructions on how to install libpng.
-
-Libpng comes in several distribution formats.  Get `libpng-*.tar.gz`
-or `libpng-*.tar.xz` if you want UNIX-style line endings in the text
-files, or `lpng*.7z` or `lpng*.zip` if you want DOS-style line endings.
-
-For a detailed description on using libpng, read `libpng-manual.txt`.
-For examples of libpng in a program, see `example.c` and `pngtest.c`.
-For usage information and restrictions (what little they are) on libpng,
-see `png.h`.  For a description on using zlib (the compression library
-used by libpng) and zlib's restrictions, see `zlib.h`.
-
-You should use zlib 1.0.4 or later to run this, but it _may_ work with
-versions as old as zlib 0.95.  Even so, there are bugs in older zlib
-versions which can cause the output of invalid compression streams for
-some images.
-
-You should also note that zlib is a compression library that is useful
-for more things than just PNG files.  You can use zlib as a drop-in
-replacement for `fread()` and `fwrite()`, if you are so inclined.
-
-zlib should be available at the same place that libpng is, or at
-https://zlib.net .
-
-You may also want a copy of the PNG specification.  It is available
-as an RFC, a W3C Recommendation, and an ISO/IEC Standard.  You can find
-these at http://www.libpng.org/pub/png/pngdocs.html .
-
-This code is currently being archived at https://libpng.sourceforge.io
-in the download area, and at http://libpng.download/src .
-
-This release, based in a large way on Glenn's, Guy's and Andreas'
-earlier work, was created and will be supported by myself and the PNG
-development group.
-
-Send comments, corrections and commendations to `png-mng-implement`
-at `lists.sourceforge.net`.  (Subscription is required; visit
-https://lists.sourceforge.net/lists/listinfo/png-mng-implement
-to subscribe.)
-
-Send general questions about the PNG specification to `png-mng-misc`
-at `lists.sourceforge.net`.  (Subscription is required; visit
-https://lists.sourceforge.net/lists/listinfo/png-mng-misc
-to subscribe.)
-
-Historical notes
-----------------
-
-The libpng library has been in extensive use and testing since mid-1995.
-Version 0.89, published a year later, was the first official release.
-By late 1997, it had finally gotten to the stage where there hadn't
-been significant changes to the API in some time, and people have a bad
-feeling about libraries with versions below 1.0.  Version 1.0.0 was
-released in March 1998.
-
-Note that some of the changes to the `png_info` structure render this
-version of the library binary incompatible with libpng-0.89 or
-earlier versions if you are using a shared library.  The type of the
-`filler` parameter for `png_set_filler()` has changed from `png_byte`
-to `png_uint_32`, which will affect shared-library applications that
-use this function.
-
-To avoid problems with changes to the internals of the `info_struct`,
-new APIs have been made available in 0.95 to avoid direct application
-access to `info_ptr`.  These functions are the `png_set_<chunk>` and
-`png_get_<chunk>` functions.  These functions should be used when
-accessing/storing the `info_struct` data, rather than manipulating it
-directly, to avoid such problems in the future.
-
-It is important to note that the APIs did not make current programs
-that access the info struct directly incompatible with the new
-library, through libpng-1.2.x.  In libpng-1.4.x, which was meant to
-be a transitional release, members of the `png_struct` and the
-`info_struct` can still be accessed, but the compiler will issue a
-warning about deprecated usage.  Since libpng-1.5.0, direct access
-to these structs is not allowed, and the definitions of the structs
-reside in private `pngstruct.h` and `pnginfo.h` header files that are
-not accessible to applications.  It is strongly suggested that new
-programs use the new APIs (as shown in `example.c` and `pngtest.c`),
-and older programs be converted to the new format, to facilitate
-upgrades in the future.
-
-The additions since 0.89 include the ability to read from a PNG stream
-which has had some (or all) of the signature bytes read by the calling
-application.  This also allows the reading of embedded PNG streams that
-do not have the PNG file signature.  As well, it is now possible to set
-the library action on the detection of chunk CRC errors.  It is possible
-to set different actions based on whether the CRC error occurred in a
-critical or an ancillary chunk.
-
-The additions since 0.90 include the ability to compile libpng as a
-Windows DLL, and new APIs for accessing data in the `info_struct`.
-Experimental functions included the ability to set weighting and cost
-factors for row filter selection, direct reads of integers from buffers
-on big-endian processors that support misaligned data access, faster
-methods of doing alpha composition, and more accurate 16-to-8 bit color
-conversion.  Some of these experimental functions, such as the weighted
-filter heuristics, have since been removed.
-
-Files included in this distribution
------------------------------------
-
-    ANNOUNCE      =>  Announcement of this version, with recent changes
-    AUTHORS       =>  List of contributing authors
-    CHANGES       =>  Description of changes between libpng versions
-    INSTALL       =>  Instructions to install libpng
-    LICENSE       =>  License to use and redistribute libpng
-    README        =>  This file
-    TODO          =>  Things not implemented in the current library
-    TRADEMARK     =>  Trademark information
-    example.c     =>  Example code for using libpng functions
-    libpng.3      =>  Manual page for libpng (includes libpng-manual.txt)
-    libpng-manual.txt  =>  Description of libpng and its functions
-    libpngpf.3    =>  Manual page for libpng's private functions (deprecated)
-    png.5         =>  Manual page for the PNG format
-    png.c         =>  Basic interface functions common to library
-    png.h         =>  Library function and interface declarations (public)
-    pngpriv.h     =>  Library function and interface declarations (private)
-    pngconf.h     =>  System specific library configuration (public)
-    pngstruct.h   =>  png_struct declaration (private)
-    pnginfo.h     =>  png_info struct declaration (private)
-    pngdebug.h    =>  debugging macros (private)
-    pngerror.c    =>  Error/warning message I/O functions
-    pngget.c      =>  Functions for retrieving info from struct
-    pngmem.c      =>  Memory handling functions
-    pngbar.png    =>  PNG logo, 88x31
-    pngnow.png    =>  PNG logo, 98x31
-    pngpread.c    =>  Progressive reading functions
-    pngread.c     =>  Read data/helper high-level functions
-    pngrio.c      =>  Lowest-level data read I/O functions
-    pngrtran.c    =>  Read data transformation functions
-    pngrutil.c    =>  Read data utility functions
-    pngset.c      =>  Functions for storing data into the info_struct
-    pngtest.c     =>  Library test program
-    pngtest.png   =>  Library test sample image
-    pngtrans.c    =>  Common data transformation functions
-    pngwio.c      =>  Lowest-level write I/O functions
-    pngwrite.c    =>  High-level write functions
-    pngwtran.c    =>  Write data transformations
-    pngwutil.c    =>  Write utility functions
-    arm/          =>  Optimized code for ARM Neon
-    intel/        =>  Optimized code for INTEL SSE2
-    loongarch/    =>  Optimized code for LoongArch LSX
-    mips/         =>  Optimized code for MIPS MSA and MIPS MMI
-    powerpc/      =>  Optimized code for PowerPC VSX
-    ci/           =>  Scripts for continuous integration
-    contrib/      =>  External contributions
-        arm-neon/     =>  Optimized code for the ARM-NEON platform
-        mips-msa/     =>  Optimized code for the MIPS-MSA platform
-        powerpc-vsx/  =>  Optimized code for the POWERPC-VSX platform
-        examples/     =>  Examples of libpng usage
-        gregbook/     =>  Source code for PNG reading and writing, from
-                          "PNG: The Definitive Guide" by Greg Roelofs,
-                          O'Reilly, 1999
-        libtests/     =>  Test programs
-        oss-fuzz/     =>  Files used by the OSS-Fuzz project for fuzz-testing
-                          libpng
-        pngexif/      =>  Program to inspect the EXIF information in PNG files
-        pngminim/     =>  Minimal decoder, encoder, and progressive decoder
-                          programs demonstrating the use of pngusr.dfa
-        pngminus/     =>  Simple pnm2png and png2pnm programs
-        pngsuite/     =>  Test images
-        testpngs/     =>  Test images
-        tools/        =>  Various tools
-        visupng/      =>  VisualPng, a Windows viewer for PNG images
-    projects/     =>  Project files and workspaces for various IDEs
-        owatcom/      =>  OpenWatcom project
-        visualc71/    =>  Microsoft Visual C++ 7.1 workspace
-        vstudio/      =>  Microsoft Visual Studio workspace
-    scripts/      =>  Scripts and makefiles for building libpng
-                      (see scripts/README.txt for the complete list)
-    tests/        =>  Test scripts
-
-Good luck, and happy coding!
-
- * Cosmin Truta (current maintainer, since 2018)
- * Glenn Randers-Pehrson (former maintainer, 1998-2018)
- * Andreas Eric Dilger (former maintainer, 1996-1997)
- * Guy Eric Schalnat (original author and former maintainer, 1995-1996)
-   (formerly of Group 42, Inc.)
diff --git a/dep/libpng/TODO b/dep/libpng/TODO
deleted file mode 100644
index 562dab069..000000000
--- a/dep/libpng/TODO
+++ /dev/null
@@ -1,23 +0,0 @@
-TODO - list of things to do for libpng:
-
-* Fix all defects (duh!)
-* Better C++ wrapper / full C++ implementation (?)
-* Fix the problems with C++ and 'extern "C"'.
-* cHRM transformation.
-* Palette creation.
-* "grayscale->palette" transformation and "palette->grayscale" detection.
-* Improved dithering.
-* Multi-lingual error and warning message support.
-* Complete sRGB transformation.  (Currently it simply uses gamma=0.45455.)
-* Man pages for function calls.
-* Better documentation.
-* Better filter selection
-  (e.g., counting huffman bits/precompression; filter inertia; filter costs).
-* Histogram creation.
-* Text conversion between different code pages (e.g., Latin-1 -> Mac).
-* Avoid building gamma tables whenever possible.
-* Greater precision in changing to linear gamma for compositing against
-  background, and in doing rgb-to-gray transformations.
-* Investigate pre-incremented loop counters and other loop constructions.
-* Interpolated method of handling interlacing.
-* More validations for libpng transformations.
diff --git a/dep/libpng/TRADEMARK b/dep/libpng/TRADEMARK
deleted file mode 100644
index ac667187d..000000000
--- a/dep/libpng/TRADEMARK
+++ /dev/null
@@ -1,8 +0,0 @@
-TRADEMARK
-=========
-
-The name "libpng" has not been registered by the Copyright owners
-as a trademark in any jurisdiction.  However, because libpng has
-been distributed and maintained world-wide, continually since 1995,
-the Copyright owners claim "common-law trademark protection" in any
-jurisdiction where common-law trademark is recognized.
diff --git a/dep/libpng/include/png.h b/dep/libpng/include/png.h
deleted file mode 100644
index 83d390312..000000000
--- a/dep/libpng/include/png.h
+++ /dev/null
@@ -1,3250 +0,0 @@
-
-/* png.h - header file for PNG reference library
- *
- * libpng version 1.6.43
- *
- * Copyright (c) 2018-2024 Cosmin Truta
- * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
- * Copyright (c) 1996-1997 Andreas Dilger
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * This code is released under the libpng license. (See LICENSE, below.)
- *
- * Authors and maintainers:
- *   libpng versions 0.71, May 1995, through 0.88, January 1996: Guy Schalnat
- *   libpng versions 0.89, June 1996, through 0.96, May 1997: Andreas Dilger
- *   libpng versions 0.97, January 1998, through 1.6.35, July 2018:
- *     Glenn Randers-Pehrson
- *   libpng versions 1.6.36, December 2018, through 1.6.43, February 2024:
- *     Cosmin Truta
- *   See also "Contributing Authors", below.
- */
-
-/*
- * COPYRIGHT NOTICE, DISCLAIMER, and LICENSE
- * =========================================
- *
- * PNG Reference Library License version 2
- * ---------------------------------------
- *
- *  * Copyright (c) 1995-2024 The PNG Reference Library Authors.
- *  * Copyright (c) 2018-2024 Cosmin Truta.
- *  * Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson.
- *  * Copyright (c) 1996-1997 Andreas Dilger.
- *  * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * The software is supplied "as is", without warranty of any kind,
- * express or implied, including, without limitation, the warranties
- * of merchantability, fitness for a particular purpose, title, and
- * non-infringement.  In no event shall the Copyright owners, or
- * anyone distributing the software, be liable for any damages or
- * other liability, whether in contract, tort or otherwise, arising
- * from, out of, or in connection with the software, or the use or
- * other dealings in the software, even if advised of the possibility
- * of such damage.
- *
- * Permission is hereby granted to use, copy, modify, and distribute
- * this software, or portions hereof, for any purpose, without fee,
- * subject to the following restrictions:
- *
- *  1. The origin of this software must not be misrepresented; you
- *     must not claim that you wrote the original software.  If you
- *     use this software in a product, an acknowledgment in the product
- *     documentation would be appreciated, but is not required.
- *
- *  2. Altered source versions must be plainly marked as such, and must
- *     not be misrepresented as being the original software.
- *
- *  3. This Copyright notice may not be removed or altered from any
- *     source or altered source distribution.
- *
- *
- * PNG Reference Library License version 1 (for libpng 0.5 through 1.6.35)
- * -----------------------------------------------------------------------
- *
- * libpng versions 1.0.7, July 1, 2000, through 1.6.35, July 15, 2018 are
- * Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson, are
- * derived from libpng-1.0.6, and are distributed according to the same
- * disclaimer and license as libpng-1.0.6 with the following individuals
- * added to the list of Contributing Authors:
- *
- *     Simon-Pierre Cadieux
- *     Eric S. Raymond
- *     Mans Rullgard
- *     Cosmin Truta
- *     Gilles Vollant
- *     James Yu
- *     Mandar Sahastrabuddhe
- *     Google Inc.
- *     Vadim Barkov
- *
- * and with the following additions to the disclaimer:
- *
- *     There is no warranty against interference with your enjoyment of
- *     the library or against infringement.  There is no warranty that our
- *     efforts or the library will fulfill any of your particular purposes
- *     or needs.  This library is provided with all faults, and the entire
- *     risk of satisfactory quality, performance, accuracy, and effort is
- *     with the user.
- *
- * Some files in the "contrib" directory and some configure-generated
- * files that are distributed with libpng have other copyright owners, and
- * are released under other open source licenses.
- *
- * libpng versions 0.97, January 1998, through 1.0.6, March 20, 2000, are
- * Copyright (c) 1998-2000 Glenn Randers-Pehrson, are derived from
- * libpng-0.96, and are distributed according to the same disclaimer and
- * license as libpng-0.96, with the following individuals added to the
- * list of Contributing Authors:
- *
- *     Tom Lane
- *     Glenn Randers-Pehrson
- *     Willem van Schaik
- *
- * libpng versions 0.89, June 1996, through 0.96, May 1997, are
- * Copyright (c) 1996-1997 Andreas Dilger, are derived from libpng-0.88,
- * and are distributed according to the same disclaimer and license as
- * libpng-0.88, with the following individuals added to the list of
- * Contributing Authors:
- *
- *     John Bowler
- *     Kevin Bracey
- *     Sam Bushell
- *     Magnus Holmgren
- *     Greg Roelofs
- *     Tom Tanner
- *
- * Some files in the "scripts" directory have other copyright owners,
- * but are released under this license.
- *
- * libpng versions 0.5, May 1995, through 0.88, January 1996, are
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * For the purposes of this copyright and license, "Contributing Authors"
- * is defined as the following set of individuals:
- *
- *     Andreas Dilger
- *     Dave Martindale
- *     Guy Eric Schalnat
- *     Paul Schmidt
- *     Tim Wegner
- *
- * The PNG Reference Library is supplied "AS IS".  The Contributing
- * Authors and Group 42, Inc. disclaim all warranties, expressed or
- * implied, including, without limitation, the warranties of
- * merchantability and of fitness for any purpose.  The Contributing
- * Authors and Group 42, Inc. assume no liability for direct, indirect,
- * incidental, special, exemplary, or consequential damages, which may
- * result from the use of the PNG Reference Library, even if advised of
- * the possibility of such damage.
- *
- * Permission is hereby granted to use, copy, modify, and distribute this
- * source code, or portions hereof, for any purpose, without fee, subject
- * to the following restrictions:
- *
- *  1. The origin of this source code must not be misrepresented.
- *
- *  2. Altered versions must be plainly marked as such and must not
- *     be misrepresented as being the original source.
- *
- *  3. This Copyright notice may not be removed or altered from any
- *     source or altered source distribution.
- *
- * The Contributing Authors and Group 42, Inc. specifically permit,
- * without fee, and encourage the use of this source code as a component
- * to supporting the PNG file format in commercial products.  If you use
- * this source code in a product, acknowledgment is not required but would
- * be appreciated.
- *
- * END OF COPYRIGHT NOTICE, DISCLAIMER, and LICENSE.
- *
- * TRADEMARK
- * =========
- *
- * The name "libpng" has not been registered by the Copyright owners
- * as a trademark in any jurisdiction.  However, because libpng has
- * been distributed and maintained world-wide, continually since 1995,
- * the Copyright owners claim "common-law trademark protection" in any
- * jurisdiction where common-law trademark is recognized.
- */
-
-/*
- * A "png_get_copyright" function is available, for convenient use in "about"
- * boxes and the like:
- *
- *    printf("%s", png_get_copyright(NULL));
- *
- * Also, the PNG logo (in PNG format, of course) is supplied in the
- * files "pngbar.png" and "pngbar.jpg (88x31) and "pngnow.png" (98x31).
- */
-
-/*
- * The contributing authors would like to thank all those who helped
- * with testing, bug fixes, and patience.  This wouldn't have been
- * possible without all of you.
- *
- * Thanks to Frank J. T. Wojcik for helping with the documentation.
- */
-
-/* Note about libpng version numbers:
- *
- *    Due to various miscommunications, unforeseen code incompatibilities
- *    and occasional factors outside the authors' control, version numbering
- *    on the library has not always been consistent and straightforward.
- *    The following table summarizes matters since version 0.89c, which was
- *    the first widely used release:
- *
- *    source                 png.h  png.h  shared-lib
- *    version                string   int  version
- *    -------                ------ -----  ----------
- *    0.89c "1.0 beta 3"     0.89      89  1.0.89
- *    0.90  "1.0 beta 4"     0.90      90  0.90  [should have been 2.0.90]
- *    0.95  "1.0 beta 5"     0.95      95  0.95  [should have been 2.0.95]
- *    0.96  "1.0 beta 6"     0.96      96  0.96  [should have been 2.0.96]
- *    0.97b "1.00.97 beta 7" 1.00.97   97  1.0.1 [should have been 2.0.97]
- *    0.97c                  0.97      97  2.0.97
- *    0.98                   0.98      98  2.0.98
- *    0.99                   0.99      98  2.0.99
- *    0.99a-m                0.99      99  2.0.99
- *    1.00                   1.00     100  2.1.0 [100 should be 10000]
- *    1.0.0      (from here on, the   100  2.1.0 [100 should be 10000]
- *    1.0.1       png.h string is   10001  2.1.0
- *    1.0.1a-e    identical to the  10002  from here on, the shared library
- *    1.0.2       source version)   10002  is 2.V where V is the source code
- *    1.0.2a-b                      10003  version, except as noted.
- *    1.0.3                         10003
- *    1.0.3a-d                      10004
- *    1.0.4                         10004
- *    1.0.4a-f                      10005
- *    1.0.5 (+ 2 patches)           10005
- *    1.0.5a-d                      10006
- *    1.0.5e-r                      10100 (not source compatible)
- *    1.0.5s-v                      10006 (not binary compatible)
- *    1.0.6 (+ 3 patches)           10006 (still binary incompatible)
- *    1.0.6d-f                      10007 (still binary incompatible)
- *    1.0.6g                        10007
- *    1.0.6h                        10007  10.6h (testing xy.z so-numbering)
- *    1.0.6i                        10007  10.6i
- *    1.0.6j                        10007  2.1.0.6j (incompatible with 1.0.0)
- *    1.0.7beta11-14        DLLNUM  10007  2.1.0.7beta11-14 (binary compatible)
- *    1.0.7beta15-18           1    10007  2.1.0.7beta15-18 (binary compatible)
- *    1.0.7rc1-2               1    10007  2.1.0.7rc1-2 (binary compatible)
- *    1.0.7                    1    10007  (still compatible)
- *    ...
- *    1.0.69                  10    10069  10.so.0.69[.0]
- *    ...
- *    1.2.59                  13    10259  12.so.0.59[.0]
- *    ...
- *    1.4.20                  14    10420  14.so.0.20[.0]
- *    ...
- *    1.5.30                  15    10530  15.so.15.30[.0]
- *    ...
- *    1.6.43                  16    10643  16.so.16.43[.0]
- *
- *    Henceforth the source version will match the shared-library major and
- *    minor numbers; the shared-library major version number will be used for
- *    changes in backward compatibility, as it is intended.
- *    The PNG_LIBPNG_VER macro, which is not used within libpng but is
- *    available for applications, is an unsigned integer of the form XYYZZ
- *    corresponding to the source version X.Y.Z (leading zeros in Y and Z).
- *    Beta versions were given the previous public release number plus a
- *    letter, until version 1.0.6j; from then on they were given the upcoming
- *    public release number plus "betaNN" or "rcNN".
- *
- *    Binary incompatibility exists only when applications make direct access
- *    to the info_ptr or png_ptr members through png.h, and the compiled
- *    application is loaded with a different version of the library.
- *
- * See libpng.txt or libpng.3 for more information.  The PNG specification
- * is available as a W3C Recommendation and as an ISO/IEC Standard; see
- * <https://www.w3.org/TR/2003/REC-PNG-20031110/>
- */
-
-#ifndef PNG_H
-#define PNG_H
-
-/* This is not the place to learn how to use libpng. The file libpng-manual.txt
- * describes how to use libpng, and the file example.c summarizes it
- * with some code on which to build.  This file is useful for looking
- * at the actual function definitions and structure components.  If that
- * file has been stripped from your copy of libpng, you can find it at
- * <http://www.libpng.org/pub/png/libpng-manual.txt>
- *
- * If you just need to read a PNG file and don't want to read the documentation
- * skip to the end of this file and read the section entitled 'simplified API'.
- */
-
-/* Version information for png.h - this should match the version in png.c */
-#define PNG_LIBPNG_VER_STRING "1.6.43"
-#define PNG_HEADER_VERSION_STRING " libpng version " PNG_LIBPNG_VER_STRING "\n"
-
-/* The versions of shared library builds should stay in sync, going forward */
-#define PNG_LIBPNG_VER_SHAREDLIB 16
-#define PNG_LIBPNG_VER_SONUM     PNG_LIBPNG_VER_SHAREDLIB /* [Deprecated] */
-#define PNG_LIBPNG_VER_DLLNUM    PNG_LIBPNG_VER_SHAREDLIB /* [Deprecated] */
-
-/* These should match the first 3 components of PNG_LIBPNG_VER_STRING: */
-#define PNG_LIBPNG_VER_MAJOR   1
-#define PNG_LIBPNG_VER_MINOR   6
-#define PNG_LIBPNG_VER_RELEASE 43
-
-/* This should be zero for a public release, or non-zero for a
- * development version.
- */
-#define PNG_LIBPNG_VER_BUILD  0
-
-/* Release Status */
-#define PNG_LIBPNG_BUILD_ALPHA    1
-#define PNG_LIBPNG_BUILD_BETA     2
-#define PNG_LIBPNG_BUILD_RC       3
-#define PNG_LIBPNG_BUILD_STABLE   4
-#define PNG_LIBPNG_BUILD_RELEASE_STATUS_MASK 7
-
-/* Release-Specific Flags */
-#define PNG_LIBPNG_BUILD_PATCH    8 /* Can be OR'ed with
-                                       PNG_LIBPNG_BUILD_STABLE only */
-#define PNG_LIBPNG_BUILD_PRIVATE 16 /* Cannot be OR'ed with
-                                       PNG_LIBPNG_BUILD_SPECIAL */
-#define PNG_LIBPNG_BUILD_SPECIAL 32 /* Cannot be OR'ed with
-                                       PNG_LIBPNG_BUILD_PRIVATE */
-
-#define PNG_LIBPNG_BUILD_BASE_TYPE PNG_LIBPNG_BUILD_STABLE
-
-/* Careful here.  At one time, Guy wanted to use 082, but that
- * would be octal.  We must not include leading zeros.
- * Versions 0.7 through 1.0.0 were in the range 0 to 100 here
- * (only version 1.0.0 was mis-numbered 100 instead of 10000).
- * From version 1.0.1 it is:
- * XXYYZZ, where XX=major, YY=minor, ZZ=release
- */
-#define PNG_LIBPNG_VER 10643 /* 1.6.43 */
-
-/* Library configuration: these options cannot be changed after
- * the library has been built.
- */
-#ifndef PNGLCONF_H
-/* If pnglibconf.h is missing, you can
- * copy scripts/pnglibconf.h.prebuilt to pnglibconf.h
- */
-#   include "pnglibconf.h"
-#endif
-
-#ifndef PNG_VERSION_INFO_ONLY
-/* Machine specific configuration. */
-#  include "pngconf.h"
-#endif
-
-/*
- * Added at libpng-1.2.8
- *
- * Ref MSDN: Private as priority over Special
- * VS_FF_PRIVATEBUILD File *was not* built using standard release
- * procedures. If this value is given, the StringFileInfo block must
- * contain a PrivateBuild string.
- *
- * VS_FF_SPECIALBUILD File *was* built by the original company using
- * standard release procedures but is a variation of the standard
- * file of the same version number. If this value is given, the
- * StringFileInfo block must contain a SpecialBuild string.
- */
-
-#ifdef PNG_USER_PRIVATEBUILD /* From pnglibconf.h */
-#  define PNG_LIBPNG_BUILD_TYPE \
-       (PNG_LIBPNG_BUILD_BASE_TYPE | PNG_LIBPNG_BUILD_PRIVATE)
-#else
-#  ifdef PNG_LIBPNG_SPECIALBUILD
-#    define PNG_LIBPNG_BUILD_TYPE \
-         (PNG_LIBPNG_BUILD_BASE_TYPE | PNG_LIBPNG_BUILD_SPECIAL)
-#  else
-#    define PNG_LIBPNG_BUILD_TYPE (PNG_LIBPNG_BUILD_BASE_TYPE)
-#  endif
-#endif
-
-#ifndef PNG_VERSION_INFO_ONLY
-
-/* Inhibit C++ name-mangling for libpng functions but not for system calls. */
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/* Version information for C files, stored in png.c.  This had better match
- * the version above.
- */
-#define png_libpng_ver png_get_header_ver(NULL)
-
-/* This file is arranged in several sections:
- *
- * 1. [omitted]
- * 2. Any configuration options that can be specified by for the application
- *    code when it is built.  (Build time configuration is in pnglibconf.h)
- * 3. Type definitions (base types are defined in pngconf.h), structure
- *    definitions.
- * 4. Exported library functions.
- * 5. Simplified API.
- * 6. Implementation options.
- *
- * The library source code has additional files (principally pngpriv.h) that
- * allow configuration of the library.
- */
-
-/* Section 1: [omitted] */
-
-/* Section 2: run time configuration
- * See pnglibconf.h for build time configuration
- *
- * Run time configuration allows the application to choose between
- * implementations of certain arithmetic APIs.  The default is set
- * at build time and recorded in pnglibconf.h, but it is safe to
- * override these (and only these) settings.  Note that this won't
- * change what the library does, only application code, and the
- * settings can (and probably should) be made on a per-file basis
- * by setting the #defines before including png.h
- *
- * Use macros to read integers from PNG data or use the exported
- * functions?
- *   PNG_USE_READ_MACROS: use the macros (see below)  Note that
- *     the macros evaluate their argument multiple times.
- *   PNG_NO_USE_READ_MACROS: call the relevant library function.
- *
- * Use the alternative algorithm for compositing alpha samples that
- * does not use division?
- *   PNG_READ_COMPOSITE_NODIV_SUPPORTED: use the 'no division'
- *      algorithm.
- *   PNG_NO_READ_COMPOSITE_NODIV: use the 'division' algorithm.
- *
- * How to handle benign errors if PNG_ALLOW_BENIGN_ERRORS is
- * false?
- *   PNG_ALLOW_BENIGN_ERRORS: map calls to the benign error
- *      APIs to png_warning.
- * Otherwise the calls are mapped to png_error.
- */
-
-/* Section 3: type definitions, including structures and compile time
- * constants.
- * See pngconf.h for base types that vary by machine/system
- */
-
-/* This triggers a compiler error in png.c, if png.c and png.h
- * do not agree upon the version number.
- */
-typedef char* png_libpng_version_1_6_43;
-
-/* Basic control structions.  Read libpng-manual.txt or libpng.3 for more info.
- *
- * png_struct is the cache of information used while reading or writing a single
- * PNG file.  One of these is always required, although the simplified API
- * (below) hides the creation and destruction of it.
- */
-typedef struct png_struct_def png_struct;
-typedef const png_struct * png_const_structp;
-typedef png_struct * png_structp;
-typedef png_struct * * png_structpp;
-
-/* png_info contains information read from or to be written to a PNG file.  One
- * or more of these must exist while reading or creating a PNG file.  The
- * information is not used by libpng during read but is used to control what
- * gets written when a PNG file is created.  "png_get_" function calls read
- * information during read and "png_set_" functions calls write information
- * when creating a PNG.
- * been moved into a separate header file that is not accessible to
- * applications.  Read libpng-manual.txt or libpng.3 for more info.
- */
-typedef struct png_info_def png_info;
-typedef png_info * png_infop;
-typedef const png_info * png_const_infop;
-typedef png_info * * png_infopp;
-
-/* Types with names ending 'p' are pointer types.  The corresponding types with
- * names ending 'rp' are identical pointer types except that the pointer is
- * marked 'restrict', which means that it is the only pointer to the object
- * passed to the function.  Applications should not use the 'restrict' types;
- * it is always valid to pass 'p' to a pointer with a function argument of the
- * corresponding 'rp' type.  Different compilers have different rules with
- * regard to type matching in the presence of 'restrict'.  For backward
- * compatibility libpng callbacks never have 'restrict' in their parameters and,
- * consequentially, writing portable application code is extremely difficult if
- * an attempt is made to use 'restrict'.
- */
-typedef png_struct * PNG_RESTRICT png_structrp;
-typedef const png_struct * PNG_RESTRICT png_const_structrp;
-typedef png_info * PNG_RESTRICT png_inforp;
-typedef const png_info * PNG_RESTRICT png_const_inforp;
-
-/* Three color definitions.  The order of the red, green, and blue, (and the
- * exact size) is not important, although the size of the fields need to
- * be png_byte or png_uint_16 (as defined below).
- */
-typedef struct png_color_struct
-{
-   png_byte red;
-   png_byte green;
-   png_byte blue;
-} png_color;
-typedef png_color * png_colorp;
-typedef const png_color * png_const_colorp;
-typedef png_color * * png_colorpp;
-
-typedef struct png_color_16_struct
-{
-   png_byte index;    /* used for palette files */
-   png_uint_16 red;   /* for use in red green blue files */
-   png_uint_16 green;
-   png_uint_16 blue;
-   png_uint_16 gray;  /* for use in grayscale files */
-} png_color_16;
-typedef png_color_16 * png_color_16p;
-typedef const png_color_16 * png_const_color_16p;
-typedef png_color_16 * * png_color_16pp;
-
-typedef struct png_color_8_struct
-{
-   png_byte red;   /* for use in red green blue files */
-   png_byte green;
-   png_byte blue;
-   png_byte gray;  /* for use in grayscale files */
-   png_byte alpha; /* for alpha channel files */
-} png_color_8;
-typedef png_color_8 * png_color_8p;
-typedef const png_color_8 * png_const_color_8p;
-typedef png_color_8 * * png_color_8pp;
-
-/*
- * The following two structures are used for the in-core representation
- * of sPLT chunks.
- */
-typedef struct png_sPLT_entry_struct
-{
-   png_uint_16 red;
-   png_uint_16 green;
-   png_uint_16 blue;
-   png_uint_16 alpha;
-   png_uint_16 frequency;
-} png_sPLT_entry;
-typedef png_sPLT_entry * png_sPLT_entryp;
-typedef const png_sPLT_entry * png_const_sPLT_entryp;
-typedef png_sPLT_entry * * png_sPLT_entrypp;
-
-/*  When the depth of the sPLT palette is 8 bits, the color and alpha samples
- *  occupy the LSB of their respective members, and the MSB of each member
- *  is zero-filled.  The frequency member always occupies the full 16 bits.
- */
-
-typedef struct png_sPLT_struct
-{
-   png_charp name;           /* palette name */
-   png_byte depth;           /* depth of palette samples */
-   png_sPLT_entryp entries;  /* palette entries */
-   png_int_32 nentries;      /* number of palette entries */
-} png_sPLT_t;
-typedef png_sPLT_t * png_sPLT_tp;
-typedef const png_sPLT_t * png_const_sPLT_tp;
-typedef png_sPLT_t * * png_sPLT_tpp;
-
-#ifdef PNG_TEXT_SUPPORTED
-/* png_text holds the contents of a text/ztxt/itxt chunk in a PNG file,
- * and whether that contents is compressed or not.  The "key" field
- * points to a regular zero-terminated C string.  The "text" fields can be a
- * regular C string, an empty string, or a NULL pointer.
- * However, the structure returned by png_get_text() will always contain
- * the "text" field as a regular zero-terminated C string (possibly
- * empty), never a NULL pointer, so it can be safely used in printf() and
- * other string-handling functions.  Note that the "itxt_length", "lang", and
- * "lang_key" members of the structure only exist when the library is built
- * with iTXt chunk support.  Prior to libpng-1.4.0 the library was built by
- * default without iTXt support. Also note that when iTXt *is* supported,
- * the "lang" and "lang_key" fields contain NULL pointers when the
- * "compression" field contains * PNG_TEXT_COMPRESSION_NONE or
- * PNG_TEXT_COMPRESSION_zTXt. Note that the "compression value" is not the
- * same as what appears in the PNG tEXt/zTXt/iTXt chunk's "compression flag"
- * which is always 0 or 1, or its "compression method" which is always 0.
- */
-typedef struct png_text_struct
-{
-   int  compression;       /* compression value:
-                             -1: tEXt, none
-                              0: zTXt, deflate
-                              1: iTXt, none
-                              2: iTXt, deflate  */
-   png_charp key;          /* keyword, 1-79 character description of "text" */
-   png_charp text;         /* comment, may be an empty string (ie "")
-                              or a NULL pointer */
-   size_t text_length;     /* length of the text string */
-   size_t itxt_length;     /* length of the itxt string */
-   png_charp lang;         /* language code, 0-79 characters
-                              or a NULL pointer */
-   png_charp lang_key;     /* keyword translated UTF-8 string, 0 or more
-                              chars or a NULL pointer */
-} png_text;
-typedef png_text * png_textp;
-typedef const png_text * png_const_textp;
-typedef png_text * * png_textpp;
-#endif
-
-/* Supported compression types for text in PNG files (tEXt, and zTXt).
- * The values of the PNG_TEXT_COMPRESSION_ defines should NOT be changed. */
-#define PNG_TEXT_COMPRESSION_NONE_WR -3
-#define PNG_TEXT_COMPRESSION_zTXt_WR -2
-#define PNG_TEXT_COMPRESSION_NONE    -1
-#define PNG_TEXT_COMPRESSION_zTXt     0
-#define PNG_ITXT_COMPRESSION_NONE     1
-#define PNG_ITXT_COMPRESSION_zTXt     2
-#define PNG_TEXT_COMPRESSION_LAST     3  /* Not a valid value */
-
-/* png_time is a way to hold the time in an machine independent way.
- * Two conversions are provided, both from time_t and struct tm.  There
- * is no portable way to convert to either of these structures, as far
- * as I know.  If you know of a portable way, send it to me.  As a side
- * note - PNG has always been Year 2000 compliant!
- */
-typedef struct png_time_struct
-{
-   png_uint_16 year; /* full year, as in, 1995 */
-   png_byte month;   /* month of year, 1 - 12 */
-   png_byte day;     /* day of month, 1 - 31 */
-   png_byte hour;    /* hour of day, 0 - 23 */
-   png_byte minute;  /* minute of hour, 0 - 59 */
-   png_byte second;  /* second of minute, 0 - 60 (for leap seconds) */
-} png_time;
-typedef png_time * png_timep;
-typedef const png_time * png_const_timep;
-typedef png_time * * png_timepp;
-
-#if defined(PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED) ||\
-   defined(PNG_USER_CHUNKS_SUPPORTED)
-/* png_unknown_chunk is a structure to hold queued chunks for which there is
- * no specific support.  The idea is that we can use this to queue
- * up private chunks for output even though the library doesn't actually
- * know about their semantics.
- *
- * The data in the structure is set by libpng on read and used on write.
- */
-typedef struct png_unknown_chunk_t
-{
-   png_byte name[5]; /* Textual chunk name with '\0' terminator */
-   png_byte *data;   /* Data, should not be modified on read! */
-   size_t size;
-
-   /* On write 'location' must be set using the flag values listed below.
-    * Notice that on read it is set by libpng however the values stored have
-    * more bits set than are listed below.  Always treat the value as a
-    * bitmask.  On write set only one bit - setting multiple bits may cause the
-    * chunk to be written in multiple places.
-    */
-   png_byte location; /* mode of operation at read time */
-}
-png_unknown_chunk;
-
-typedef png_unknown_chunk * png_unknown_chunkp;
-typedef const png_unknown_chunk * png_const_unknown_chunkp;
-typedef png_unknown_chunk * * png_unknown_chunkpp;
-#endif
-
-/* Flag values for the unknown chunk location byte. */
-#define PNG_HAVE_IHDR  0x01
-#define PNG_HAVE_PLTE  0x02
-#define PNG_AFTER_IDAT 0x08
-
-/* Maximum positive integer used in PNG is (2^31)-1 */
-#define PNG_UINT_31_MAX ((png_uint_32)0x7fffffffL)
-#define PNG_UINT_32_MAX ((png_uint_32)(-1))
-#define PNG_SIZE_MAX ((size_t)(-1))
-
-/* These are constants for fixed point values encoded in the
- * PNG specification manner (x100000)
- */
-#define PNG_FP_1    100000
-#define PNG_FP_HALF  50000
-#define PNG_FP_MAX  ((png_fixed_point)0x7fffffffL)
-#define PNG_FP_MIN  (-PNG_FP_MAX)
-
-/* These describe the color_type field in png_info. */
-/* color type masks */
-#define PNG_COLOR_MASK_PALETTE    1
-#define PNG_COLOR_MASK_COLOR      2
-#define PNG_COLOR_MASK_ALPHA      4
-
-/* color types.  Note that not all combinations are legal */
-#define PNG_COLOR_TYPE_GRAY 0
-#define PNG_COLOR_TYPE_PALETTE  (PNG_COLOR_MASK_COLOR | PNG_COLOR_MASK_PALETTE)
-#define PNG_COLOR_TYPE_RGB        (PNG_COLOR_MASK_COLOR)
-#define PNG_COLOR_TYPE_RGB_ALPHA  (PNG_COLOR_MASK_COLOR | PNG_COLOR_MASK_ALPHA)
-#define PNG_COLOR_TYPE_GRAY_ALPHA (PNG_COLOR_MASK_ALPHA)
-/* aliases */
-#define PNG_COLOR_TYPE_RGBA  PNG_COLOR_TYPE_RGB_ALPHA
-#define PNG_COLOR_TYPE_GA  PNG_COLOR_TYPE_GRAY_ALPHA
-
-/* This is for compression type. PNG 1.0-1.2 only define the single type. */
-#define PNG_COMPRESSION_TYPE_BASE 0 /* Deflate method 8, 32K window */
-#define PNG_COMPRESSION_TYPE_DEFAULT PNG_COMPRESSION_TYPE_BASE
-
-/* This is for filter type. PNG 1.0-1.2 only define the single type. */
-#define PNG_FILTER_TYPE_BASE      0 /* Single row per-byte filtering */
-#define PNG_INTRAPIXEL_DIFFERENCING 64 /* Used only in MNG datastreams */
-#define PNG_FILTER_TYPE_DEFAULT   PNG_FILTER_TYPE_BASE
-
-/* These are for the interlacing type.  These values should NOT be changed. */
-#define PNG_INTERLACE_NONE        0 /* Non-interlaced image */
-#define PNG_INTERLACE_ADAM7       1 /* Adam7 interlacing */
-#define PNG_INTERLACE_LAST        2 /* Not a valid value */
-
-/* These are for the oFFs chunk.  These values should NOT be changed. */
-#define PNG_OFFSET_PIXEL          0 /* Offset in pixels */
-#define PNG_OFFSET_MICROMETER     1 /* Offset in micrometers (1/10^6 meter) */
-#define PNG_OFFSET_LAST           2 /* Not a valid value */
-
-/* These are for the pCAL chunk.  These values should NOT be changed. */
-#define PNG_EQUATION_LINEAR       0 /* Linear transformation */
-#define PNG_EQUATION_BASE_E       1 /* Exponential base e transform */
-#define PNG_EQUATION_ARBITRARY    2 /* Arbitrary base exponential transform */
-#define PNG_EQUATION_HYPERBOLIC   3 /* Hyperbolic sine transformation */
-#define PNG_EQUATION_LAST         4 /* Not a valid value */
-
-/* These are for the sCAL chunk.  These values should NOT be changed. */
-#define PNG_SCALE_UNKNOWN         0 /* unknown unit (image scale) */
-#define PNG_SCALE_METER           1 /* meters per pixel */
-#define PNG_SCALE_RADIAN          2 /* radians per pixel */
-#define PNG_SCALE_LAST            3 /* Not a valid value */
-
-/* These are for the pHYs chunk.  These values should NOT be changed. */
-#define PNG_RESOLUTION_UNKNOWN    0 /* pixels/unknown unit (aspect ratio) */
-#define PNG_RESOLUTION_METER      1 /* pixels/meter */
-#define PNG_RESOLUTION_LAST       2 /* Not a valid value */
-
-/* These are for the sRGB chunk.  These values should NOT be changed. */
-#define PNG_sRGB_INTENT_PERCEPTUAL 0
-#define PNG_sRGB_INTENT_RELATIVE   1
-#define PNG_sRGB_INTENT_SATURATION 2
-#define PNG_sRGB_INTENT_ABSOLUTE   3
-#define PNG_sRGB_INTENT_LAST       4 /* Not a valid value */
-
-/* This is for text chunks */
-#define PNG_KEYWORD_MAX_LENGTH     79
-
-/* Maximum number of entries in PLTE/sPLT/tRNS arrays */
-#define PNG_MAX_PALETTE_LENGTH    256
-
-/* These determine if an ancillary chunk's data has been successfully read
- * from the PNG header, or if the application has filled in the corresponding
- * data in the info_struct to be written into the output file.  The values
- * of the PNG_INFO_<chunk> defines should NOT be changed.
- */
-#define PNG_INFO_gAMA 0x0001U
-#define PNG_INFO_sBIT 0x0002U
-#define PNG_INFO_cHRM 0x0004U
-#define PNG_INFO_PLTE 0x0008U
-#define PNG_INFO_tRNS 0x0010U
-#define PNG_INFO_bKGD 0x0020U
-#define PNG_INFO_hIST 0x0040U
-#define PNG_INFO_pHYs 0x0080U
-#define PNG_INFO_oFFs 0x0100U
-#define PNG_INFO_tIME 0x0200U
-#define PNG_INFO_pCAL 0x0400U
-#define PNG_INFO_sRGB 0x0800U  /* GR-P, 0.96a */
-#define PNG_INFO_iCCP 0x1000U  /* ESR, 1.0.6 */
-#define PNG_INFO_sPLT 0x2000U  /* ESR, 1.0.6 */
-#define PNG_INFO_sCAL 0x4000U  /* ESR, 1.0.6 */
-#define PNG_INFO_IDAT 0x8000U  /* ESR, 1.0.6 */
-#define PNG_INFO_eXIf 0x10000U /* GR-P, 1.6.31 */
-
-/* This is used for the transformation routines, as some of them
- * change these values for the row.  It also should enable using
- * the routines for other purposes.
- */
-typedef struct png_row_info_struct
-{
-   png_uint_32 width;    /* width of row */
-   size_t rowbytes;      /* number of bytes in row */
-   png_byte color_type;  /* color type of row */
-   png_byte bit_depth;   /* bit depth of row */
-   png_byte channels;    /* number of channels (1, 2, 3, or 4) */
-   png_byte pixel_depth; /* bits per pixel (depth * channels) */
-} png_row_info;
-
-typedef png_row_info * png_row_infop;
-typedef png_row_info * * png_row_infopp;
-
-/* These are the function types for the I/O functions and for the functions
- * that allow the user to override the default I/O functions with his or her
- * own.  The png_error_ptr type should match that of user-supplied warning
- * and error functions, while the png_rw_ptr type should match that of the
- * user read/write data functions.  Note that the 'write' function must not
- * modify the buffer it is passed. The 'read' function, on the other hand, is
- * expected to return the read data in the buffer.
- */
-typedef PNG_CALLBACK(void, *png_error_ptr, (png_structp, png_const_charp));
-typedef PNG_CALLBACK(void, *png_rw_ptr, (png_structp, png_bytep, size_t));
-typedef PNG_CALLBACK(void, *png_flush_ptr, (png_structp));
-typedef PNG_CALLBACK(void, *png_read_status_ptr, (png_structp, png_uint_32,
-    int));
-typedef PNG_CALLBACK(void, *png_write_status_ptr, (png_structp, png_uint_32,
-    int));
-
-#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
-typedef PNG_CALLBACK(void, *png_progressive_info_ptr, (png_structp, png_infop));
-typedef PNG_CALLBACK(void, *png_progressive_end_ptr, (png_structp, png_infop));
-
-/* The following callback receives png_uint_32 row_number, int pass for the
- * png_bytep data of the row.  When transforming an interlaced image the
- * row number is the row number within the sub-image of the interlace pass, so
- * the value will increase to the height of the sub-image (not the full image)
- * then reset to 0 for the next pass.
- *
- * Use PNG_ROW_FROM_PASS_ROW(row, pass) and PNG_COL_FROM_PASS_COL(col, pass) to
- * find the output pixel (x,y) given an interlaced sub-image pixel
- * (row,col,pass).  (See below for these macros.)
- */
-typedef PNG_CALLBACK(void, *png_progressive_row_ptr, (png_structp, png_bytep,
-    png_uint_32, int));
-#endif
-
-#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
-    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
-typedef PNG_CALLBACK(void, *png_user_transform_ptr, (png_structp, png_row_infop,
-    png_bytep));
-#endif
-
-#ifdef PNG_USER_CHUNKS_SUPPORTED
-typedef PNG_CALLBACK(int, *png_user_chunk_ptr, (png_structp,
-    png_unknown_chunkp));
-#endif
-#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED
-/* not used anywhere */
-/* typedef PNG_CALLBACK(void, *png_unknown_chunk_ptr, (png_structp)); */
-#endif
-
-#ifdef PNG_SETJMP_SUPPORTED
-/* This must match the function definition in <setjmp.h>, and the application
- * must include this before png.h to obtain the definition of jmp_buf.  The
- * function is required to be PNG_NORETURN, but this is not checked.  If the
- * function does return the application will crash via an abort() or similar
- * system level call.
- *
- * If you get a warning here while building the library you may need to make
- * changes to ensure that pnglibconf.h records the calling convention used by
- * your compiler.  This may be very difficult - try using a different compiler
- * to build the library!
- */
-PNG_FUNCTION(void, (PNGCAPI *png_longjmp_ptr), PNGARG((jmp_buf, int)), typedef);
-#endif
-
-/* Transform masks for the high-level interface */
-#define PNG_TRANSFORM_IDENTITY       0x0000    /* read and write */
-#define PNG_TRANSFORM_STRIP_16       0x0001    /* read only */
-#define PNG_TRANSFORM_STRIP_ALPHA    0x0002    /* read only */
-#define PNG_TRANSFORM_PACKING        0x0004    /* read and write */
-#define PNG_TRANSFORM_PACKSWAP       0x0008    /* read and write */
-#define PNG_TRANSFORM_EXPAND         0x0010    /* read only */
-#define PNG_TRANSFORM_INVERT_MONO    0x0020    /* read and write */
-#define PNG_TRANSFORM_SHIFT          0x0040    /* read and write */
-#define PNG_TRANSFORM_BGR            0x0080    /* read and write */
-#define PNG_TRANSFORM_SWAP_ALPHA     0x0100    /* read and write */
-#define PNG_TRANSFORM_SWAP_ENDIAN    0x0200    /* read and write */
-#define PNG_TRANSFORM_INVERT_ALPHA   0x0400    /* read and write */
-#define PNG_TRANSFORM_STRIP_FILLER   0x0800    /* write only */
-/* Added to libpng-1.2.34 */
-#define PNG_TRANSFORM_STRIP_FILLER_BEFORE PNG_TRANSFORM_STRIP_FILLER
-#define PNG_TRANSFORM_STRIP_FILLER_AFTER 0x1000 /* write only */
-/* Added to libpng-1.4.0 */
-#define PNG_TRANSFORM_GRAY_TO_RGB   0x2000      /* read only */
-/* Added to libpng-1.5.4 */
-#define PNG_TRANSFORM_EXPAND_16     0x4000      /* read only */
-#if ~0U > 0xffffU /* or else this might break on a 16-bit machine */
-#define PNG_TRANSFORM_SCALE_16      0x8000      /* read only */
-#endif
-
-/* Flags for MNG supported features */
-#define PNG_FLAG_MNG_EMPTY_PLTE     0x01
-#define PNG_FLAG_MNG_FILTER_64      0x04
-#define PNG_ALL_MNG_FEATURES        0x05
-
-/* NOTE: prior to 1.5 these functions had no 'API' style declaration,
- * this allowed the zlib default functions to be used on Windows
- * platforms.  In 1.5 the zlib default malloc (which just calls malloc and
- * ignores the first argument) should be completely compatible with the
- * following.
- */
-typedef PNG_CALLBACK(png_voidp, *png_malloc_ptr, (png_structp,
-    png_alloc_size_t));
-typedef PNG_CALLBACK(void, *png_free_ptr, (png_structp, png_voidp));
-
-/* Section 4: exported functions
- * Here are the function definitions most commonly used.  This is not
- * the place to find out how to use libpng.  See libpng-manual.txt for the
- * full explanation, see example.c for the summary.  This just provides
- * a simple one line description of the use of each function.
- *
- * The PNG_EXPORT() and PNG_EXPORTA() macros used below are defined in
- * pngconf.h and in the *.dfn files in the scripts directory.
- *
- *   PNG_EXPORT(ordinal, type, name, (args));
- *
- *       ordinal:    ordinal that is used while building
- *                   *.def files. The ordinal value is only
- *                   relevant when preprocessing png.h with
- *                   the *.dfn files for building symbol table
- *                   entries, and are removed by pngconf.h.
- *       type:       return type of the function
- *       name:       function name
- *       args:       function arguments, with types
- *
- * When we wish to append attributes to a function prototype we use
- * the PNG_EXPORTA() macro instead.
- *
- *   PNG_EXPORTA(ordinal, type, name, (args), attributes);
- *
- *       ordinal, type, name, and args: same as in PNG_EXPORT().
- *       attributes: function attributes
- */
-
-/* Returns the version number of the library */
-PNG_EXPORT(1, png_uint_32, png_access_version_number, (void));
-
-/* Tell lib we have already handled the first <num_bytes> magic bytes.
- * Handling more than 8 bytes from the beginning of the file is an error.
- */
-PNG_EXPORT(2, void, png_set_sig_bytes, (png_structrp png_ptr, int num_bytes));
-
-/* Check sig[start] through sig[start + num_to_check - 1] to see if it's a
- * PNG file.  Returns zero if the supplied bytes match the 8-byte PNG
- * signature, and non-zero otherwise.  Having num_to_check == 0 or
- * start > 7 will always fail (i.e. return non-zero).
- */
-PNG_EXPORT(3, int, png_sig_cmp, (png_const_bytep sig, size_t start,
-    size_t num_to_check));
-
-/* Simple signature checking function.  This is the same as calling
- * png_check_sig(sig, n) := (png_sig_cmp(sig, 0, n) == 0).
- */
-#define png_check_sig(sig, n) (png_sig_cmp((sig), 0, (n)) == 0) /* DEPRECATED */
-
-/* Allocate and initialize png_ptr struct for reading, and any other memory. */
-PNG_EXPORTA(4, png_structp, png_create_read_struct,
-    (png_const_charp user_png_ver, png_voidp error_ptr,
-    png_error_ptr error_fn, png_error_ptr warn_fn),
-    PNG_ALLOCATED);
-
-/* Allocate and initialize png_ptr struct for writing, and any other memory */
-PNG_EXPORTA(5, png_structp, png_create_write_struct,
-    (png_const_charp user_png_ver, png_voidp error_ptr, png_error_ptr error_fn,
-    png_error_ptr warn_fn),
-    PNG_ALLOCATED);
-
-PNG_EXPORT(6, size_t, png_get_compression_buffer_size,
-    (png_const_structrp png_ptr));
-
-PNG_EXPORT(7, void, png_set_compression_buffer_size, (png_structrp png_ptr,
-    size_t size));
-
-/* Moved from pngconf.h in 1.4.0 and modified to ensure setjmp/longjmp
- * match up.
- */
-#ifdef PNG_SETJMP_SUPPORTED
-/* This function returns the jmp_buf built in to *png_ptr.  It must be
- * supplied with an appropriate 'longjmp' function to use on that jmp_buf
- * unless the default error function is overridden in which case NULL is
- * acceptable.  The size of the jmp_buf is checked against the actual size
- * allocated by the library - the call will return NULL on a mismatch
- * indicating an ABI mismatch.
- */
-PNG_EXPORT(8, jmp_buf*, png_set_longjmp_fn, (png_structrp png_ptr,
-    png_longjmp_ptr longjmp_fn, size_t jmp_buf_size));
-#  define png_jmpbuf(png_ptr) \
-      (*png_set_longjmp_fn((png_ptr), longjmp, (sizeof (jmp_buf))))
-#else
-#  define png_jmpbuf(png_ptr) \
-      (LIBPNG_WAS_COMPILED_WITH__PNG_NO_SETJMP)
-#endif
-/* This function should be used by libpng applications in place of
- * longjmp(png_ptr->jmpbuf, val).  If longjmp_fn() has been set, it
- * will use it; otherwise it will call PNG_ABORT().  This function was
- * added in libpng-1.5.0.
- */
-PNG_EXPORTA(9, void, png_longjmp, (png_const_structrp png_ptr, int val),
-    PNG_NORETURN);
-
-#ifdef PNG_READ_SUPPORTED
-/* Reset the compression stream */
-PNG_EXPORTA(10, int, png_reset_zstream, (png_structrp png_ptr), PNG_DEPRECATED);
-#endif
-
-/* New functions added in libpng-1.0.2 (not enabled by default until 1.2.0) */
-#ifdef PNG_USER_MEM_SUPPORTED
-PNG_EXPORTA(11, png_structp, png_create_read_struct_2,
-    (png_const_charp user_png_ver, png_voidp error_ptr, png_error_ptr error_fn,
-    png_error_ptr warn_fn,
-    png_voidp mem_ptr, png_malloc_ptr malloc_fn, png_free_ptr free_fn),
-    PNG_ALLOCATED);
-PNG_EXPORTA(12, png_structp, png_create_write_struct_2,
-    (png_const_charp user_png_ver, png_voidp error_ptr, png_error_ptr error_fn,
-    png_error_ptr warn_fn,
-    png_voidp mem_ptr, png_malloc_ptr malloc_fn, png_free_ptr free_fn),
-    PNG_ALLOCATED);
-#endif
-
-/* Write the PNG file signature. */
-PNG_EXPORT(13, void, png_write_sig, (png_structrp png_ptr));
-
-/* Write a PNG chunk - size, type, (optional) data, CRC. */
-PNG_EXPORT(14, void, png_write_chunk, (png_structrp png_ptr, png_const_bytep
-    chunk_name, png_const_bytep data, size_t length));
-
-/* Write the start of a PNG chunk - length and chunk name. */
-PNG_EXPORT(15, void, png_write_chunk_start, (png_structrp png_ptr,
-    png_const_bytep chunk_name, png_uint_32 length));
-
-/* Write the data of a PNG chunk started with png_write_chunk_start(). */
-PNG_EXPORT(16, void, png_write_chunk_data, (png_structrp png_ptr,
-    png_const_bytep data, size_t length));
-
-/* Finish a chunk started with png_write_chunk_start() (includes CRC). */
-PNG_EXPORT(17, void, png_write_chunk_end, (png_structrp png_ptr));
-
-/* Allocate and initialize the info structure */
-PNG_EXPORTA(18, png_infop, png_create_info_struct, (png_const_structrp png_ptr),
-    PNG_ALLOCATED);
-
-/* DEPRECATED: this function allowed init structures to be created using the
- * default allocation method (typically malloc).  Use is deprecated in 1.6.0 and
- * the API will be removed in the future.
- */
-PNG_EXPORTA(19, void, png_info_init_3, (png_infopp info_ptr,
-    size_t png_info_struct_size), PNG_DEPRECATED);
-
-/* Writes all the PNG information before the image. */
-PNG_EXPORT(20, void, png_write_info_before_PLTE,
-    (png_structrp png_ptr, png_const_inforp info_ptr));
-PNG_EXPORT(21, void, png_write_info,
-    (png_structrp png_ptr, png_const_inforp info_ptr));
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-/* Read the information before the actual image data. */
-PNG_EXPORT(22, void, png_read_info,
-    (png_structrp png_ptr, png_inforp info_ptr));
-#endif
-
-#ifdef PNG_TIME_RFC1123_SUPPORTED
-   /* Convert to a US string format: there is no localization support in this
-    * routine.  The original implementation used a 29 character buffer in
-    * png_struct, this will be removed in future versions.
-    */
-#if PNG_LIBPNG_VER < 10700
-/* To do: remove this from libpng17 (and from libpng17/png.c and pngstruct.h) */
-PNG_EXPORTA(23, png_const_charp, png_convert_to_rfc1123, (png_structrp png_ptr,
-    png_const_timep ptime),PNG_DEPRECATED);
-#endif
-PNG_EXPORT(241, int, png_convert_to_rfc1123_buffer, (char out[29],
-    png_const_timep ptime));
-#endif
-
-#ifdef PNG_CONVERT_tIME_SUPPORTED
-/* Convert from a struct tm to png_time */
-PNG_EXPORT(24, void, png_convert_from_struct_tm, (png_timep ptime,
-    const struct tm * ttime));
-
-/* Convert from time_t to png_time.  Uses gmtime() */
-PNG_EXPORT(25, void, png_convert_from_time_t, (png_timep ptime, time_t ttime));
-#endif /* CONVERT_tIME */
-
-#ifdef PNG_READ_EXPAND_SUPPORTED
-/* Expand data to 24-bit RGB, or 8-bit grayscale, with alpha if available. */
-PNG_EXPORT(26, void, png_set_expand, (png_structrp png_ptr));
-PNG_EXPORT(27, void, png_set_expand_gray_1_2_4_to_8, (png_structrp png_ptr));
-PNG_EXPORT(28, void, png_set_palette_to_rgb, (png_structrp png_ptr));
-PNG_EXPORT(29, void, png_set_tRNS_to_alpha, (png_structrp png_ptr));
-#endif
-
-#ifdef PNG_READ_EXPAND_16_SUPPORTED
-/* Expand to 16-bit channels, forces conversion of palette to RGB and expansion
- * of a tRNS chunk if present.
- */
-PNG_EXPORT(221, void, png_set_expand_16, (png_structrp png_ptr));
-#endif
-
-#if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED)
-/* Use blue, green, red order for pixels. */
-PNG_EXPORT(30, void, png_set_bgr, (png_structrp png_ptr));
-#endif
-
-#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
-/* Expand the grayscale to 24-bit RGB if necessary. */
-PNG_EXPORT(31, void, png_set_gray_to_rgb, (png_structrp png_ptr));
-#endif
-
-#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
-/* Reduce RGB to grayscale. */
-#define PNG_ERROR_ACTION_NONE  1
-#define PNG_ERROR_ACTION_WARN  2
-#define PNG_ERROR_ACTION_ERROR 3
-#define PNG_RGB_TO_GRAY_DEFAULT (-1)/*for red/green coefficients*/
-
-PNG_FP_EXPORT(32, void, png_set_rgb_to_gray, (png_structrp png_ptr,
-    int error_action, double red, double green))
-PNG_FIXED_EXPORT(33, void, png_set_rgb_to_gray_fixed, (png_structrp png_ptr,
-    int error_action, png_fixed_point red, png_fixed_point green))
-
-PNG_EXPORT(34, png_byte, png_get_rgb_to_gray_status, (png_const_structrp
-    png_ptr));
-#endif
-
-#ifdef PNG_BUILD_GRAYSCALE_PALETTE_SUPPORTED
-PNG_EXPORT(35, void, png_build_grayscale_palette, (int bit_depth,
-    png_colorp palette));
-#endif
-
-#ifdef PNG_READ_ALPHA_MODE_SUPPORTED
-/* How the alpha channel is interpreted - this affects how the color channels
- * of a PNG file are returned to the calling application when an alpha channel,
- * or a tRNS chunk in a palette file, is present.
- *
- * This has no effect on the way pixels are written into a PNG output
- * datastream. The color samples in a PNG datastream are never premultiplied
- * with the alpha samples.
- *
- * The default is to return data according to the PNG specification: the alpha
- * channel is a linear measure of the contribution of the pixel to the
- * corresponding composited pixel, and the color channels are unassociated
- * (not premultiplied).  The gamma encoded color channels must be scaled
- * according to the contribution and to do this it is necessary to undo
- * the encoding, scale the color values, perform the composition and re-encode
- * the values.  This is the 'PNG' mode.
- *
- * The alternative is to 'associate' the alpha with the color information by
- * storing color channel values that have been scaled by the alpha.
- * image.  These are the 'STANDARD', 'ASSOCIATED' or 'PREMULTIPLIED' modes
- * (the latter being the two common names for associated alpha color channels).
- *
- * For the 'OPTIMIZED' mode, a pixel is treated as opaque only if the alpha
- * value is equal to the maximum value.
- *
- * The final choice is to gamma encode the alpha channel as well.  This is
- * broken because, in practice, no implementation that uses this choice
- * correctly undoes the encoding before handling alpha composition.  Use this
- * choice only if other serious errors in the software or hardware you use
- * mandate it; the typical serious error is for dark halos to appear around
- * opaque areas of the composited PNG image because of arithmetic overflow.
- *
- * The API function png_set_alpha_mode specifies which of these choices to use
- * with an enumerated 'mode' value and the gamma of the required output:
- */
-#define PNG_ALPHA_PNG           0 /* according to the PNG standard */
-#define PNG_ALPHA_STANDARD      1 /* according to Porter/Duff */
-#define PNG_ALPHA_ASSOCIATED    1 /* as above; this is the normal practice */
-#define PNG_ALPHA_PREMULTIPLIED 1 /* as above */
-#define PNG_ALPHA_OPTIMIZED     2 /* 'PNG' for opaque pixels, else 'STANDARD' */
-#define PNG_ALPHA_BROKEN        3 /* the alpha channel is gamma encoded */
-
-PNG_FP_EXPORT(227, void, png_set_alpha_mode, (png_structrp png_ptr, int mode,
-    double output_gamma))
-PNG_FIXED_EXPORT(228, void, png_set_alpha_mode_fixed, (png_structrp png_ptr,
-    int mode, png_fixed_point output_gamma))
-#endif
-
-#if defined(PNG_GAMMA_SUPPORTED) || defined(PNG_READ_ALPHA_MODE_SUPPORTED)
-/* The output_gamma value is a screen gamma in libpng terminology: it expresses
- * how to decode the output values, not how they are encoded.
- */
-#define PNG_DEFAULT_sRGB -1       /* sRGB gamma and color space */
-#define PNG_GAMMA_MAC_18 -2       /* Old Mac '1.8' gamma and color space */
-#define PNG_GAMMA_sRGB   220000   /* Television standards--matches sRGB gamma */
-#define PNG_GAMMA_LINEAR PNG_FP_1 /* Linear */
-#endif
-
-/* The following are examples of calls to png_set_alpha_mode to achieve the
- * required overall gamma correction and, where necessary, alpha
- * premultiplication.
- *
- * png_set_alpha_mode(pp, PNG_ALPHA_PNG, PNG_DEFAULT_sRGB);
- *    This is the default libpng handling of the alpha channel - it is not
- *    pre-multiplied into the color components.  In addition the call states
- *    that the output is for a sRGB system and causes all PNG files without gAMA
- *    chunks to be assumed to be encoded using sRGB.
- *
- * png_set_alpha_mode(pp, PNG_ALPHA_PNG, PNG_GAMMA_MAC);
- *    In this case the output is assumed to be something like an sRGB conformant
- *    display preceded by a power-law lookup table of power 1.45.  This is how
- *    early Mac systems behaved.
- *
- * png_set_alpha_mode(pp, PNG_ALPHA_STANDARD, PNG_GAMMA_LINEAR);
- *    This is the classic Jim Blinn approach and will work in academic
- *    environments where everything is done by the book.  It has the shortcoming
- *    of assuming that input PNG data with no gamma information is linear - this
- *    is unlikely to be correct unless the PNG files where generated locally.
- *    Most of the time the output precision will be so low as to show
- *    significant banding in dark areas of the image.
- *
- * png_set_expand_16(pp);
- * png_set_alpha_mode(pp, PNG_ALPHA_STANDARD, PNG_DEFAULT_sRGB);
- *    This is a somewhat more realistic Jim Blinn inspired approach.  PNG files
- *    are assumed to have the sRGB encoding if not marked with a gamma value and
- *    the output is always 16 bits per component.  This permits accurate scaling
- *    and processing of the data.  If you know that your input PNG files were
- *    generated locally you might need to replace PNG_DEFAULT_sRGB with the
- *    correct value for your system.
- *
- * png_set_alpha_mode(pp, PNG_ALPHA_OPTIMIZED, PNG_DEFAULT_sRGB);
- *    If you just need to composite the PNG image onto an existing background
- *    and if you control the code that does this you can use the optimization
- *    setting.  In this case you just copy completely opaque pixels to the
- *    output.  For pixels that are not completely transparent (you just skip
- *    those) you do the composition math using png_composite or png_composite_16
- *    below then encode the resultant 8-bit or 16-bit values to match the output
- *    encoding.
- *
- * Other cases
- *    If neither the PNG nor the standard linear encoding work for you because
- *    of the software or hardware you use then you have a big problem.  The PNG
- *    case will probably result in halos around the image.  The linear encoding
- *    will probably result in a washed out, too bright, image (it's actually too
- *    contrasty.)  Try the ALPHA_OPTIMIZED mode above - this will probably
- *    substantially reduce the halos.  Alternatively try:
- *
- * png_set_alpha_mode(pp, PNG_ALPHA_BROKEN, PNG_DEFAULT_sRGB);
- *    This option will also reduce the halos, but there will be slight dark
- *    halos round the opaque parts of the image where the background is light.
- *    In the OPTIMIZED mode the halos will be light halos where the background
- *    is dark.  Take your pick - the halos are unavoidable unless you can get
- *    your hardware/software fixed!  (The OPTIMIZED approach is slightly
- *    faster.)
- *
- * When the default gamma of PNG files doesn't match the output gamma.
- *    If you have PNG files with no gamma information png_set_alpha_mode allows
- *    you to provide a default gamma, but it also sets the output gamma to the
- *    matching value.  If you know your PNG files have a gamma that doesn't
- *    match the output you can take advantage of the fact that
- *    png_set_alpha_mode always sets the output gamma but only sets the PNG
- *    default if it is not already set:
- *
- * png_set_alpha_mode(pp, PNG_ALPHA_PNG, PNG_DEFAULT_sRGB);
- * png_set_alpha_mode(pp, PNG_ALPHA_PNG, PNG_GAMMA_MAC);
- *    The first call sets both the default and the output gamma values, the
- *    second call overrides the output gamma without changing the default.  This
- *    is easier than achieving the same effect with png_set_gamma.  You must use
- *    PNG_ALPHA_PNG for the first call - internal checking in png_set_alpha will
- *    fire if more than one call to png_set_alpha_mode and png_set_background is
- *    made in the same read operation, however multiple calls with PNG_ALPHA_PNG
- *    are ignored.
- */
-
-#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED
-PNG_EXPORT(36, void, png_set_strip_alpha, (png_structrp png_ptr));
-#endif
-
-#if defined(PNG_READ_SWAP_ALPHA_SUPPORTED) || \
-    defined(PNG_WRITE_SWAP_ALPHA_SUPPORTED)
-PNG_EXPORT(37, void, png_set_swap_alpha, (png_structrp png_ptr));
-#endif
-
-#if defined(PNG_READ_INVERT_ALPHA_SUPPORTED) || \
-    defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED)
-PNG_EXPORT(38, void, png_set_invert_alpha, (png_structrp png_ptr));
-#endif
-
-#if defined(PNG_READ_FILLER_SUPPORTED) || defined(PNG_WRITE_FILLER_SUPPORTED)
-/* Add a filler byte to 8-bit or 16-bit Gray or 24-bit or 48-bit RGB images. */
-PNG_EXPORT(39, void, png_set_filler, (png_structrp png_ptr, png_uint_32 filler,
-    int flags));
-/* The values of the PNG_FILLER_ defines should NOT be changed */
-#  define PNG_FILLER_BEFORE 0
-#  define PNG_FILLER_AFTER 1
-/* Add an alpha byte to 8-bit or 16-bit Gray or 24-bit or 48-bit RGB images. */
-PNG_EXPORT(40, void, png_set_add_alpha, (png_structrp png_ptr,
-    png_uint_32 filler, int flags));
-#endif /* READ_FILLER || WRITE_FILLER */
-
-#if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED)
-/* Swap bytes in 16-bit depth files. */
-PNG_EXPORT(41, void, png_set_swap, (png_structrp png_ptr));
-#endif
-
-#if defined(PNG_READ_PACK_SUPPORTED) || defined(PNG_WRITE_PACK_SUPPORTED)
-/* Use 1 byte per pixel in 1, 2, or 4-bit depth files. */
-PNG_EXPORT(42, void, png_set_packing, (png_structrp png_ptr));
-#endif
-
-#if defined(PNG_READ_PACKSWAP_SUPPORTED) || \
-    defined(PNG_WRITE_PACKSWAP_SUPPORTED)
-/* Swap packing order of pixels in bytes. */
-PNG_EXPORT(43, void, png_set_packswap, (png_structrp png_ptr));
-#endif
-
-#if defined(PNG_READ_SHIFT_SUPPORTED) || defined(PNG_WRITE_SHIFT_SUPPORTED)
-/* Converts files to legal bit depths. */
-PNG_EXPORT(44, void, png_set_shift, (png_structrp png_ptr, png_const_color_8p
-    true_bits));
-#endif
-
-#if defined(PNG_READ_INTERLACING_SUPPORTED) || \
-    defined(PNG_WRITE_INTERLACING_SUPPORTED)
-/* Have the code handle the interlacing.  Returns the number of passes.
- * MUST be called before png_read_update_info or png_start_read_image,
- * otherwise it will not have the desired effect.  Note that it is still
- * necessary to call png_read_row or png_read_rows png_get_image_height
- * times for each pass.
-*/
-PNG_EXPORT(45, int, png_set_interlace_handling, (png_structrp png_ptr));
-#endif
-
-#if defined(PNG_READ_INVERT_SUPPORTED) || defined(PNG_WRITE_INVERT_SUPPORTED)
-/* Invert monochrome files */
-PNG_EXPORT(46, void, png_set_invert_mono, (png_structrp png_ptr));
-#endif
-
-#ifdef PNG_READ_BACKGROUND_SUPPORTED
-/* Handle alpha and tRNS by replacing with a background color.  Prior to
- * libpng-1.5.4 this API must not be called before the PNG file header has been
- * read.  Doing so will result in unexpected behavior and possible warnings or
- * errors if the PNG file contains a bKGD chunk.
- */
-PNG_FP_EXPORT(47, void, png_set_background, (png_structrp png_ptr,
-    png_const_color_16p background_color, int background_gamma_code,
-    int need_expand, double background_gamma))
-PNG_FIXED_EXPORT(215, void, png_set_background_fixed, (png_structrp png_ptr,
-    png_const_color_16p background_color, int background_gamma_code,
-    int need_expand, png_fixed_point background_gamma))
-#endif
-#ifdef PNG_READ_BACKGROUND_SUPPORTED
-#  define PNG_BACKGROUND_GAMMA_UNKNOWN 0
-#  define PNG_BACKGROUND_GAMMA_SCREEN  1
-#  define PNG_BACKGROUND_GAMMA_FILE    2
-#  define PNG_BACKGROUND_GAMMA_UNIQUE  3
-#endif
-
-#ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED
-/* Scale a 16-bit depth file down to 8-bit, accurately. */
-PNG_EXPORT(229, void, png_set_scale_16, (png_structrp png_ptr));
-#endif
-
-#ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED
-#define PNG_READ_16_TO_8_SUPPORTED /* Name prior to 1.5.4 */
-/* Strip the second byte of information from a 16-bit depth file. */
-PNG_EXPORT(48, void, png_set_strip_16, (png_structrp png_ptr));
-#endif
-
-#ifdef PNG_READ_QUANTIZE_SUPPORTED
-/* Turn on quantizing, and reduce the palette to the number of colors
- * available.
- */
-PNG_EXPORT(49, void, png_set_quantize, (png_structrp png_ptr,
-    png_colorp palette, int num_palette, int maximum_colors,
-    png_const_uint_16p histogram, int full_quantize));
-#endif
-
-#ifdef PNG_READ_GAMMA_SUPPORTED
-/* The threshold on gamma processing is configurable but hard-wired into the
- * library.  The following is the floating point variant.
- */
-#define PNG_GAMMA_THRESHOLD (PNG_GAMMA_THRESHOLD_FIXED*.00001)
-
-/* Handle gamma correction. Screen_gamma=(display_exponent).
- * NOTE: this API simply sets the screen and file gamma values. It will
- * therefore override the value for gamma in a PNG file if it is called after
- * the file header has been read - use with care  - call before reading the PNG
- * file for best results!
- *
- * These routines accept the same gamma values as png_set_alpha_mode (described
- * above).  The PNG_GAMMA_ defines and PNG_DEFAULT_sRGB can be passed to either
- * API (floating point or fixed.)  Notice, however, that the 'file_gamma' value
- * is the inverse of a 'screen gamma' value.
- */
-PNG_FP_EXPORT(50, void, png_set_gamma, (png_structrp png_ptr,
-    double screen_gamma, double override_file_gamma))
-PNG_FIXED_EXPORT(208, void, png_set_gamma_fixed, (png_structrp png_ptr,
-    png_fixed_point screen_gamma, png_fixed_point override_file_gamma))
-#endif
-
-#ifdef PNG_WRITE_FLUSH_SUPPORTED
-/* Set how many lines between output flushes - 0 for no flushing */
-PNG_EXPORT(51, void, png_set_flush, (png_structrp png_ptr, int nrows));
-/* Flush the current PNG output buffer */
-PNG_EXPORT(52, void, png_write_flush, (png_structrp png_ptr));
-#endif
-
-/* Optional update palette with requested transformations */
-PNG_EXPORT(53, void, png_start_read_image, (png_structrp png_ptr));
-
-/* Optional call to update the users info structure */
-PNG_EXPORT(54, void, png_read_update_info, (png_structrp png_ptr,
-    png_inforp info_ptr));
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-/* Read one or more rows of image data. */
-PNG_EXPORT(55, void, png_read_rows, (png_structrp png_ptr, png_bytepp row,
-    png_bytepp display_row, png_uint_32 num_rows));
-#endif
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-/* Read a row of data. */
-PNG_EXPORT(56, void, png_read_row, (png_structrp png_ptr, png_bytep row,
-    png_bytep display_row));
-#endif
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-/* Read the whole image into memory at once. */
-PNG_EXPORT(57, void, png_read_image, (png_structrp png_ptr, png_bytepp image));
-#endif
-
-/* Write a row of image data */
-PNG_EXPORT(58, void, png_write_row, (png_structrp png_ptr,
-    png_const_bytep row));
-
-/* Write a few rows of image data: (*row) is not written; however, the type
- * is declared as writeable to maintain compatibility with previous versions
- * of libpng and to allow the 'display_row' array from read_rows to be passed
- * unchanged to write_rows.
- */
-PNG_EXPORT(59, void, png_write_rows, (png_structrp png_ptr, png_bytepp row,
-    png_uint_32 num_rows));
-
-/* Write the image data */
-PNG_EXPORT(60, void, png_write_image, (png_structrp png_ptr, png_bytepp image));
-
-/* Write the end of the PNG file. */
-PNG_EXPORT(61, void, png_write_end, (png_structrp png_ptr,
-    png_inforp info_ptr));
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-/* Read the end of the PNG file. */
-PNG_EXPORT(62, void, png_read_end, (png_structrp png_ptr, png_inforp info_ptr));
-#endif
-
-/* Free any memory associated with the png_info_struct */
-PNG_EXPORT(63, void, png_destroy_info_struct, (png_const_structrp png_ptr,
-    png_infopp info_ptr_ptr));
-
-/* Free any memory associated with the png_struct and the png_info_structs */
-PNG_EXPORT(64, void, png_destroy_read_struct, (png_structpp png_ptr_ptr,
-    png_infopp info_ptr_ptr, png_infopp end_info_ptr_ptr));
-
-/* Free any memory associated with the png_struct and the png_info_structs */
-PNG_EXPORT(65, void, png_destroy_write_struct, (png_structpp png_ptr_ptr,
-    png_infopp info_ptr_ptr));
-
-/* Set the libpng method of handling chunk CRC errors */
-PNG_EXPORT(66, void, png_set_crc_action, (png_structrp png_ptr, int crit_action,
-    int ancil_action));
-
-/* Values for png_set_crc_action() say how to handle CRC errors in
- * ancillary and critical chunks, and whether to use the data contained
- * therein.  Note that it is impossible to "discard" data in a critical
- * chunk.  For versions prior to 0.90, the action was always error/quit,
- * whereas in version 0.90 and later, the action for CRC errors in ancillary
- * chunks is warn/discard.  These values should NOT be changed.
- *
- *      value                       action:critical     action:ancillary
- */
-#define PNG_CRC_DEFAULT       0  /* error/quit          warn/discard data */
-#define PNG_CRC_ERROR_QUIT    1  /* error/quit          error/quit        */
-#define PNG_CRC_WARN_DISCARD  2  /* (INVALID)           warn/discard data */
-#define PNG_CRC_WARN_USE      3  /* warn/use data       warn/use data     */
-#define PNG_CRC_QUIET_USE     4  /* quiet/use data      quiet/use data    */
-#define PNG_CRC_NO_CHANGE     5  /* use current value   use current value */
-
-#ifdef PNG_WRITE_SUPPORTED
-/* These functions give the user control over the scan-line filtering in
- * libpng and the compression methods used by zlib.  These functions are
- * mainly useful for testing, as the defaults should work with most users.
- * Those users who are tight on memory or want faster performance at the
- * expense of compression can modify them.  See the compression library
- * header file (zlib.h) for an explanation of the compression functions.
- */
-
-/* Set the filtering method(s) used by libpng.  Currently, the only valid
- * value for "method" is 0.
- */
-PNG_EXPORT(67, void, png_set_filter, (png_structrp png_ptr, int method,
-    int filters));
-#endif /* WRITE */
-
-/* Flags for png_set_filter() to say which filters to use.  The flags
- * are chosen so that they don't conflict with real filter types
- * below, in case they are supplied instead of the #defined constants.
- * These values should NOT be changed.
- */
-#define PNG_NO_FILTERS     0x00
-#define PNG_FILTER_NONE    0x08
-#define PNG_FILTER_SUB     0x10
-#define PNG_FILTER_UP      0x20
-#define PNG_FILTER_AVG     0x40
-#define PNG_FILTER_PAETH   0x80
-#define PNG_FAST_FILTERS (PNG_FILTER_NONE | PNG_FILTER_SUB | PNG_FILTER_UP)
-#define PNG_ALL_FILTERS (PNG_FAST_FILTERS | PNG_FILTER_AVG | PNG_FILTER_PAETH)
-
-/* Filter values (not flags) - used in pngwrite.c, pngwutil.c for now.
- * These defines should NOT be changed.
- */
-#define PNG_FILTER_VALUE_NONE  0
-#define PNG_FILTER_VALUE_SUB   1
-#define PNG_FILTER_VALUE_UP    2
-#define PNG_FILTER_VALUE_AVG   3
-#define PNG_FILTER_VALUE_PAETH 4
-#define PNG_FILTER_VALUE_LAST  5
-
-#ifdef PNG_WRITE_SUPPORTED
-#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED /* DEPRECATED */
-PNG_FP_EXPORT(68, void, png_set_filter_heuristics, (png_structrp png_ptr,
-    int heuristic_method, int num_weights, png_const_doublep filter_weights,
-    png_const_doublep filter_costs))
-PNG_FIXED_EXPORT(209, void, png_set_filter_heuristics_fixed,
-    (png_structrp png_ptr, int heuristic_method, int num_weights,
-    png_const_fixed_point_p filter_weights,
-    png_const_fixed_point_p filter_costs))
-#endif /* WRITE_WEIGHTED_FILTER */
-
-/* The following are no longer used and will be removed from libpng-1.7: */
-#define PNG_FILTER_HEURISTIC_DEFAULT    0  /* Currently "UNWEIGHTED" */
-#define PNG_FILTER_HEURISTIC_UNWEIGHTED 1  /* Used by libpng < 0.95 */
-#define PNG_FILTER_HEURISTIC_WEIGHTED   2  /* Experimental feature */
-#define PNG_FILTER_HEURISTIC_LAST       3  /* Not a valid value */
-
-/* Set the library compression level.  Currently, valid values range from
- * 0 - 9, corresponding directly to the zlib compression levels 0 - 9
- * (0 - no compression, 9 - "maximal" compression).  Note that tests have
- * shown that zlib compression levels 3-6 usually perform as well as level 9
- * for PNG images, and do considerably fewer calculations.  In the future,
- * these values may not correspond directly to the zlib compression levels.
- */
-#ifdef PNG_WRITE_CUSTOMIZE_COMPRESSION_SUPPORTED
-PNG_EXPORT(69, void, png_set_compression_level, (png_structrp png_ptr,
-    int level));
-
-PNG_EXPORT(70, void, png_set_compression_mem_level, (png_structrp png_ptr,
-    int mem_level));
-
-PNG_EXPORT(71, void, png_set_compression_strategy, (png_structrp png_ptr,
-    int strategy));
-
-/* If PNG_WRITE_OPTIMIZE_CMF_SUPPORTED is defined, libpng will use a
- * smaller value of window_bits if it can do so safely.
- */
-PNG_EXPORT(72, void, png_set_compression_window_bits, (png_structrp png_ptr,
-    int window_bits));
-
-PNG_EXPORT(73, void, png_set_compression_method, (png_structrp png_ptr,
-    int method));
-#endif /* WRITE_CUSTOMIZE_COMPRESSION */
-
-#ifdef PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED
-/* Also set zlib parameters for compressing non-IDAT chunks */
-PNG_EXPORT(222, void, png_set_text_compression_level, (png_structrp png_ptr,
-    int level));
-
-PNG_EXPORT(223, void, png_set_text_compression_mem_level, (png_structrp png_ptr,
-    int mem_level));
-
-PNG_EXPORT(224, void, png_set_text_compression_strategy, (png_structrp png_ptr,
-    int strategy));
-
-/* If PNG_WRITE_OPTIMIZE_CMF_SUPPORTED is defined, libpng will use a
- * smaller value of window_bits if it can do so safely.
- */
-PNG_EXPORT(225, void, png_set_text_compression_window_bits,
-    (png_structrp png_ptr, int window_bits));
-
-PNG_EXPORT(226, void, png_set_text_compression_method, (png_structrp png_ptr,
-    int method));
-#endif /* WRITE_CUSTOMIZE_ZTXT_COMPRESSION */
-#endif /* WRITE */
-
-/* These next functions are called for input/output, memory, and error
- * handling.  They are in the file pngrio.c, pngwio.c, and pngerror.c,
- * and call standard C I/O routines such as fread(), fwrite(), and
- * fprintf().  These functions can be made to use other I/O routines
- * at run time for those applications that need to handle I/O in a
- * different manner by calling png_set_???_fn().  See libpng-manual.txt for
- * more information.
- */
-
-#ifdef PNG_STDIO_SUPPORTED
-/* Initialize the input/output for the PNG file to the default functions. */
-PNG_EXPORT(74, void, png_init_io, (png_structrp png_ptr, png_FILE_p fp));
-#endif
-
-/* Replace the (error and abort), and warning functions with user
- * supplied functions.  If no messages are to be printed you must still
- * write and use replacement functions. The replacement error_fn should
- * still do a longjmp to the last setjmp location if you are using this
- * method of error handling.  If error_fn or warning_fn is NULL, the
- * default function will be used.
- */
-
-PNG_EXPORT(75, void, png_set_error_fn, (png_structrp png_ptr,
-    png_voidp error_ptr, png_error_ptr error_fn, png_error_ptr warning_fn));
-
-/* Return the user pointer associated with the error functions */
-PNG_EXPORT(76, png_voidp, png_get_error_ptr, (png_const_structrp png_ptr));
-
-/* Replace the default data output functions with a user supplied one(s).
- * If buffered output is not used, then output_flush_fn can be set to NULL.
- * If PNG_WRITE_FLUSH_SUPPORTED is not defined at libpng compile time
- * output_flush_fn will be ignored (and thus can be NULL).
- * It is probably a mistake to use NULL for output_flush_fn if
- * write_data_fn is not also NULL unless you have built libpng with
- * PNG_WRITE_FLUSH_SUPPORTED undefined, because in this case libpng's
- * default flush function, which uses the standard *FILE structure, will
- * be used.
- */
-PNG_EXPORT(77, void, png_set_write_fn, (png_structrp png_ptr, png_voidp io_ptr,
-    png_rw_ptr write_data_fn, png_flush_ptr output_flush_fn));
-
-/* Replace the default data input function with a user supplied one. */
-PNG_EXPORT(78, void, png_set_read_fn, (png_structrp png_ptr, png_voidp io_ptr,
-    png_rw_ptr read_data_fn));
-
-/* Return the user pointer associated with the I/O functions */
-PNG_EXPORT(79, png_voidp, png_get_io_ptr, (png_const_structrp png_ptr));
-
-PNG_EXPORT(80, void, png_set_read_status_fn, (png_structrp png_ptr,
-    png_read_status_ptr read_row_fn));
-
-PNG_EXPORT(81, void, png_set_write_status_fn, (png_structrp png_ptr,
-    png_write_status_ptr write_row_fn));
-
-#ifdef PNG_USER_MEM_SUPPORTED
-/* Replace the default memory allocation functions with user supplied one(s). */
-PNG_EXPORT(82, void, png_set_mem_fn, (png_structrp png_ptr, png_voidp mem_ptr,
-    png_malloc_ptr malloc_fn, png_free_ptr free_fn));
-/* Return the user pointer associated with the memory functions */
-PNG_EXPORT(83, png_voidp, png_get_mem_ptr, (png_const_structrp png_ptr));
-#endif
-
-#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
-PNG_EXPORT(84, void, png_set_read_user_transform_fn, (png_structrp png_ptr,
-    png_user_transform_ptr read_user_transform_fn));
-#endif
-
-#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED
-PNG_EXPORT(85, void, png_set_write_user_transform_fn, (png_structrp png_ptr,
-    png_user_transform_ptr write_user_transform_fn));
-#endif
-
-#ifdef PNG_USER_TRANSFORM_PTR_SUPPORTED
-PNG_EXPORT(86, void, png_set_user_transform_info, (png_structrp png_ptr,
-    png_voidp user_transform_ptr, int user_transform_depth,
-    int user_transform_channels));
-/* Return the user pointer associated with the user transform functions */
-PNG_EXPORT(87, png_voidp, png_get_user_transform_ptr,
-    (png_const_structrp png_ptr));
-#endif
-
-#ifdef PNG_USER_TRANSFORM_INFO_SUPPORTED
-/* Return information about the row currently being processed.  Note that these
- * APIs do not fail but will return unexpected results if called outside a user
- * transform callback.  Also note that when transforming an interlaced image the
- * row number is the row number within the sub-image of the interlace pass, so
- * the value will increase to the height of the sub-image (not the full image)
- * then reset to 0 for the next pass.
- *
- * Use PNG_ROW_FROM_PASS_ROW(row, pass) and PNG_COL_FROM_PASS_COL(col, pass) to
- * find the output pixel (x,y) given an interlaced sub-image pixel
- * (row,col,pass).  (See below for these macros.)
- */
-PNG_EXPORT(217, png_uint_32, png_get_current_row_number, (png_const_structrp));
-PNG_EXPORT(218, png_byte, png_get_current_pass_number, (png_const_structrp));
-#endif
-
-#ifdef PNG_READ_USER_CHUNKS_SUPPORTED
-/* This callback is called only for *unknown* chunks.  If
- * PNG_HANDLE_AS_UNKNOWN_SUPPORTED is set then it is possible to set known
- * chunks to be treated as unknown, however in this case the callback must do
- * any processing required by the chunk (e.g. by calling the appropriate
- * png_set_ APIs.)
- *
- * There is no write support - on write, by default, all the chunks in the
- * 'unknown' list are written in the specified position.
- *
- * The integer return from the callback function is interpreted thus:
- *
- * negative: An error occurred; png_chunk_error will be called.
- *     zero: The chunk was not handled, the chunk will be saved. A critical
- *           chunk will cause an error at this point unless it is to be saved.
- * positive: The chunk was handled, libpng will ignore/discard it.
- *
- * See "INTERACTION WITH USER CHUNK CALLBACKS" below for important notes about
- * how this behavior will change in libpng 1.7
- */
-PNG_EXPORT(88, void, png_set_read_user_chunk_fn, (png_structrp png_ptr,
-    png_voidp user_chunk_ptr, png_user_chunk_ptr read_user_chunk_fn));
-#endif
-
-#ifdef PNG_USER_CHUNKS_SUPPORTED
-PNG_EXPORT(89, png_voidp, png_get_user_chunk_ptr, (png_const_structrp png_ptr));
-#endif
-
-#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
-/* Sets the function callbacks for the push reader, and a pointer to a
- * user-defined structure available to the callback functions.
- */
-PNG_EXPORT(90, void, png_set_progressive_read_fn, (png_structrp png_ptr,
-    png_voidp progressive_ptr, png_progressive_info_ptr info_fn,
-    png_progressive_row_ptr row_fn, png_progressive_end_ptr end_fn));
-
-/* Returns the user pointer associated with the push read functions */
-PNG_EXPORT(91, png_voidp, png_get_progressive_ptr,
-    (png_const_structrp png_ptr));
-
-/* Function to be called when data becomes available */
-PNG_EXPORT(92, void, png_process_data, (png_structrp png_ptr,
-    png_inforp info_ptr, png_bytep buffer, size_t buffer_size));
-
-/* A function which may be called *only* within png_process_data to stop the
- * processing of any more data.  The function returns the number of bytes
- * remaining, excluding any that libpng has cached internally.  A subsequent
- * call to png_process_data must supply these bytes again.  If the argument
- * 'save' is set to true the routine will first save all the pending data and
- * will always return 0.
- */
-PNG_EXPORT(219, size_t, png_process_data_pause, (png_structrp, int save));
-
-/* A function which may be called *only* outside (after) a call to
- * png_process_data.  It returns the number of bytes of data to skip in the
- * input.  Normally it will return 0, but if it returns a non-zero value the
- * application must skip than number of bytes of input data and pass the
- * following data to the next call to png_process_data.
- */
-PNG_EXPORT(220, png_uint_32, png_process_data_skip, (png_structrp));
-
-/* Function that combines rows.  'new_row' is a flag that should come from
- * the callback and be non-NULL if anything needs to be done; the library
- * stores its own version of the new data internally and ignores the passed
- * in value.
- */
-PNG_EXPORT(93, void, png_progressive_combine_row, (png_const_structrp png_ptr,
-    png_bytep old_row, png_const_bytep new_row));
-#endif /* PROGRESSIVE_READ */
-
-PNG_EXPORTA(94, png_voidp, png_malloc, (png_const_structrp png_ptr,
-    png_alloc_size_t size), PNG_ALLOCATED);
-/* Added at libpng version 1.4.0 */
-PNG_EXPORTA(95, png_voidp, png_calloc, (png_const_structrp png_ptr,
-    png_alloc_size_t size), PNG_ALLOCATED);
-
-/* Added at libpng version 1.2.4 */
-PNG_EXPORTA(96, png_voidp, png_malloc_warn, (png_const_structrp png_ptr,
-    png_alloc_size_t size), PNG_ALLOCATED);
-
-/* Frees a pointer allocated by png_malloc() */
-PNG_EXPORT(97, void, png_free, (png_const_structrp png_ptr, png_voidp ptr));
-
-/* Free data that was allocated internally */
-PNG_EXPORT(98, void, png_free_data, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 free_me, int num));
-
-/* Reassign the responsibility for freeing existing data, whether allocated
- * by libpng or by the application; this works on the png_info structure passed
- * in, without changing the state for other png_info structures.
- */
-PNG_EXPORT(99, void, png_data_freer, (png_const_structrp png_ptr,
-    png_inforp info_ptr, int freer, png_uint_32 mask));
-
-/* Assignments for png_data_freer */
-#define PNG_DESTROY_WILL_FREE_DATA 1
-#define PNG_SET_WILL_FREE_DATA 1
-#define PNG_USER_WILL_FREE_DATA 2
-/* Flags for png_ptr->free_me and info_ptr->free_me */
-#define PNG_FREE_HIST 0x0008U
-#define PNG_FREE_ICCP 0x0010U
-#define PNG_FREE_SPLT 0x0020U
-#define PNG_FREE_ROWS 0x0040U
-#define PNG_FREE_PCAL 0x0080U
-#define PNG_FREE_SCAL 0x0100U
-#ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED
-#  define PNG_FREE_UNKN 0x0200U
-#endif
-/*      PNG_FREE_LIST 0x0400U   removed in 1.6.0 because it is ignored */
-#define PNG_FREE_PLTE 0x1000U
-#define PNG_FREE_TRNS 0x2000U
-#define PNG_FREE_TEXT 0x4000U
-#define PNG_FREE_EXIF 0x8000U /* Added at libpng-1.6.31 */
-#define PNG_FREE_ALL  0xffffU
-#define PNG_FREE_MUL  0x4220U /* PNG_FREE_SPLT|PNG_FREE_TEXT|PNG_FREE_UNKN */
-
-#ifdef PNG_USER_MEM_SUPPORTED
-PNG_EXPORTA(100, png_voidp, png_malloc_default, (png_const_structrp png_ptr,
-    png_alloc_size_t size), PNG_ALLOCATED PNG_DEPRECATED);
-PNG_EXPORTA(101, void, png_free_default, (png_const_structrp png_ptr,
-    png_voidp ptr), PNG_DEPRECATED);
-#endif
-
-#ifdef PNG_ERROR_TEXT_SUPPORTED
-/* Fatal error in PNG image of libpng - can't continue */
-PNG_EXPORTA(102, void, png_error, (png_const_structrp png_ptr,
-    png_const_charp error_message), PNG_NORETURN);
-
-/* The same, but the chunk name is prepended to the error string. */
-PNG_EXPORTA(103, void, png_chunk_error, (png_const_structrp png_ptr,
-    png_const_charp error_message), PNG_NORETURN);
-
-#else
-/* Fatal error in PNG image of libpng - can't continue */
-PNG_EXPORTA(104, void, png_err, (png_const_structrp png_ptr), PNG_NORETURN);
-#  define png_error(s1,s2) png_err(s1)
-#  define png_chunk_error(s1,s2) png_err(s1)
-#endif
-
-#ifdef PNG_WARNINGS_SUPPORTED
-/* Non-fatal error in libpng.  Can continue, but may have a problem. */
-PNG_EXPORT(105, void, png_warning, (png_const_structrp png_ptr,
-    png_const_charp warning_message));
-
-/* Non-fatal error in libpng, chunk name is prepended to message. */
-PNG_EXPORT(106, void, png_chunk_warning, (png_const_structrp png_ptr,
-    png_const_charp warning_message));
-#else
-#  define png_warning(s1,s2) ((void)(s1))
-#  define png_chunk_warning(s1,s2) ((void)(s1))
-#endif
-
-#ifdef PNG_BENIGN_ERRORS_SUPPORTED
-/* Benign error in libpng.  Can continue, but may have a problem.
- * User can choose whether to handle as a fatal error or as a warning. */
-PNG_EXPORT(107, void, png_benign_error, (png_const_structrp png_ptr,
-    png_const_charp warning_message));
-
-#ifdef PNG_READ_SUPPORTED
-/* Same, chunk name is prepended to message (only during read) */
-PNG_EXPORT(108, void, png_chunk_benign_error, (png_const_structrp png_ptr,
-    png_const_charp warning_message));
-#endif
-
-PNG_EXPORT(109, void, png_set_benign_errors,
-    (png_structrp png_ptr, int allowed));
-#else
-#  ifdef PNG_ALLOW_BENIGN_ERRORS
-#    define png_benign_error png_warning
-#    define png_chunk_benign_error png_chunk_warning
-#  else
-#    define png_benign_error png_error
-#    define png_chunk_benign_error png_chunk_error
-#  endif
-#endif
-
-/* The png_set_<chunk> functions are for storing values in the png_info_struct.
- * Similarly, the png_get_<chunk> calls are used to read values from the
- * png_info_struct, either storing the parameters in the passed variables, or
- * setting pointers into the png_info_struct where the data is stored.  The
- * png_get_<chunk> functions return a non-zero value if the data was available
- * in info_ptr, or return zero and do not change any of the parameters if the
- * data was not available.
- *
- * These functions should be used instead of directly accessing png_info
- * to avoid problems with future changes in the size and internal layout of
- * png_info_struct.
- */
-/* Returns "flag" if chunk data is valid in info_ptr. */
-PNG_EXPORT(110, png_uint_32, png_get_valid, (png_const_structrp png_ptr,
-    png_const_inforp info_ptr, png_uint_32 flag));
-
-/* Returns number of bytes needed to hold a transformed row. */
-PNG_EXPORT(111, size_t, png_get_rowbytes, (png_const_structrp png_ptr,
-    png_const_inforp info_ptr));
-
-#ifdef PNG_INFO_IMAGE_SUPPORTED
-/* Returns row_pointers, which is an array of pointers to scanlines that was
- * returned from png_read_png().
- */
-PNG_EXPORT(112, png_bytepp, png_get_rows, (png_const_structrp png_ptr,
-    png_const_inforp info_ptr));
-
-/* Set row_pointers, which is an array of pointers to scanlines for use
- * by png_write_png().
- */
-PNG_EXPORT(113, void, png_set_rows, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_bytepp row_pointers));
-#endif
-
-/* Returns number of color channels in image. */
-PNG_EXPORT(114, png_byte, png_get_channels, (png_const_structrp png_ptr,
-    png_const_inforp info_ptr));
-
-#ifdef PNG_EASY_ACCESS_SUPPORTED
-/* Returns image width in pixels. */
-PNG_EXPORT(115, png_uint_32, png_get_image_width, (png_const_structrp png_ptr,
-    png_const_inforp info_ptr));
-
-/* Returns image height in pixels. */
-PNG_EXPORT(116, png_uint_32, png_get_image_height, (png_const_structrp png_ptr,
-    png_const_inforp info_ptr));
-
-/* Returns image bit_depth. */
-PNG_EXPORT(117, png_byte, png_get_bit_depth, (png_const_structrp png_ptr,
-    png_const_inforp info_ptr));
-
-/* Returns image color_type. */
-PNG_EXPORT(118, png_byte, png_get_color_type, (png_const_structrp png_ptr,
-    png_const_inforp info_ptr));
-
-/* Returns image filter_type. */
-PNG_EXPORT(119, png_byte, png_get_filter_type, (png_const_structrp png_ptr,
-    png_const_inforp info_ptr));
-
-/* Returns image interlace_type. */
-PNG_EXPORT(120, png_byte, png_get_interlace_type, (png_const_structrp png_ptr,
-    png_const_inforp info_ptr));
-
-/* Returns image compression_type. */
-PNG_EXPORT(121, png_byte, png_get_compression_type, (png_const_structrp png_ptr,
-    png_const_inforp info_ptr));
-
-/* Returns image resolution in pixels per meter, from pHYs chunk data. */
-PNG_EXPORT(122, png_uint_32, png_get_pixels_per_meter,
-    (png_const_structrp png_ptr, png_const_inforp info_ptr));
-PNG_EXPORT(123, png_uint_32, png_get_x_pixels_per_meter,
-    (png_const_structrp png_ptr, png_const_inforp info_ptr));
-PNG_EXPORT(124, png_uint_32, png_get_y_pixels_per_meter,
-    (png_const_structrp png_ptr, png_const_inforp info_ptr));
-
-/* Returns pixel aspect ratio, computed from pHYs chunk data.  */
-PNG_FP_EXPORT(125, float, png_get_pixel_aspect_ratio,
-    (png_const_structrp png_ptr, png_const_inforp info_ptr))
-PNG_FIXED_EXPORT(210, png_fixed_point, png_get_pixel_aspect_ratio_fixed,
-    (png_const_structrp png_ptr, png_const_inforp info_ptr))
-
-/* Returns image x, y offset in pixels or microns, from oFFs chunk data. */
-PNG_EXPORT(126, png_int_32, png_get_x_offset_pixels,
-    (png_const_structrp png_ptr, png_const_inforp info_ptr));
-PNG_EXPORT(127, png_int_32, png_get_y_offset_pixels,
-    (png_const_structrp png_ptr, png_const_inforp info_ptr));
-PNG_EXPORT(128, png_int_32, png_get_x_offset_microns,
-    (png_const_structrp png_ptr, png_const_inforp info_ptr));
-PNG_EXPORT(129, png_int_32, png_get_y_offset_microns,
-    (png_const_structrp png_ptr, png_const_inforp info_ptr));
-
-#endif /* EASY_ACCESS */
-
-#ifdef PNG_READ_SUPPORTED
-/* Returns pointer to signature string read from PNG header */
-PNG_EXPORT(130, png_const_bytep, png_get_signature, (png_const_structrp png_ptr,
-    png_const_inforp info_ptr));
-#endif
-
-#ifdef PNG_bKGD_SUPPORTED
-PNG_EXPORT(131, png_uint_32, png_get_bKGD, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_color_16p *background));
-#endif
-
-#ifdef PNG_bKGD_SUPPORTED
-PNG_EXPORT(132, void, png_set_bKGD, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_const_color_16p background));
-#endif
-
-#ifdef PNG_cHRM_SUPPORTED
-PNG_FP_EXPORT(133, png_uint_32, png_get_cHRM, (png_const_structrp png_ptr,
-    png_const_inforp info_ptr, double *white_x, double *white_y, double *red_x,
-    double *red_y, double *green_x, double *green_y, double *blue_x,
-    double *blue_y))
-PNG_FP_EXPORT(230, png_uint_32, png_get_cHRM_XYZ, (png_const_structrp png_ptr,
-    png_const_inforp info_ptr, double *red_X, double *red_Y, double *red_Z,
-    double *green_X, double *green_Y, double *green_Z, double *blue_X,
-    double *blue_Y, double *blue_Z))
-PNG_FIXED_EXPORT(134, png_uint_32, png_get_cHRM_fixed,
-    (png_const_structrp png_ptr, png_const_inforp info_ptr,
-    png_fixed_point *int_white_x, png_fixed_point *int_white_y,
-    png_fixed_point *int_red_x, png_fixed_point *int_red_y,
-    png_fixed_point *int_green_x, png_fixed_point *int_green_y,
-    png_fixed_point *int_blue_x, png_fixed_point *int_blue_y))
-PNG_FIXED_EXPORT(231, png_uint_32, png_get_cHRM_XYZ_fixed,
-    (png_const_structrp png_ptr, png_const_inforp info_ptr,
-    png_fixed_point *int_red_X, png_fixed_point *int_red_Y,
-    png_fixed_point *int_red_Z, png_fixed_point *int_green_X,
-    png_fixed_point *int_green_Y, png_fixed_point *int_green_Z,
-    png_fixed_point *int_blue_X, png_fixed_point *int_blue_Y,
-    png_fixed_point *int_blue_Z))
-#endif
-
-#ifdef PNG_cHRM_SUPPORTED
-PNG_FP_EXPORT(135, void, png_set_cHRM, (png_const_structrp png_ptr,
-    png_inforp info_ptr,
-    double white_x, double white_y, double red_x, double red_y, double green_x,
-    double green_y, double blue_x, double blue_y))
-PNG_FP_EXPORT(232, void, png_set_cHRM_XYZ, (png_const_structrp png_ptr,
-    png_inforp info_ptr, double red_X, double red_Y, double red_Z,
-    double green_X, double green_Y, double green_Z, double blue_X,
-    double blue_Y, double blue_Z))
-PNG_FIXED_EXPORT(136, void, png_set_cHRM_fixed, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_fixed_point int_white_x,
-    png_fixed_point int_white_y, png_fixed_point int_red_x,
-    png_fixed_point int_red_y, png_fixed_point int_green_x,
-    png_fixed_point int_green_y, png_fixed_point int_blue_x,
-    png_fixed_point int_blue_y))
-PNG_FIXED_EXPORT(233, void, png_set_cHRM_XYZ_fixed, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_fixed_point int_red_X, png_fixed_point int_red_Y,
-    png_fixed_point int_red_Z, png_fixed_point int_green_X,
-    png_fixed_point int_green_Y, png_fixed_point int_green_Z,
-    png_fixed_point int_blue_X, png_fixed_point int_blue_Y,
-    png_fixed_point int_blue_Z))
-#endif
-
-#ifdef PNG_eXIf_SUPPORTED
-PNG_EXPORT(246, png_uint_32, png_get_eXIf, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_bytep *exif));
-PNG_EXPORT(247, void, png_set_eXIf, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_bytep exif));
-
-PNG_EXPORT(248, png_uint_32, png_get_eXIf_1, (png_const_structrp png_ptr,
-    png_const_inforp info_ptr, png_uint_32 *num_exif, png_bytep *exif));
-PNG_EXPORT(249, void, png_set_eXIf_1, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 num_exif, png_bytep exif));
-#endif
-
-#ifdef PNG_gAMA_SUPPORTED
-PNG_FP_EXPORT(137, png_uint_32, png_get_gAMA, (png_const_structrp png_ptr,
-    png_const_inforp info_ptr, double *file_gamma))
-PNG_FIXED_EXPORT(138, png_uint_32, png_get_gAMA_fixed,
-    (png_const_structrp png_ptr, png_const_inforp info_ptr,
-    png_fixed_point *int_file_gamma))
-#endif
-
-#ifdef PNG_gAMA_SUPPORTED
-PNG_FP_EXPORT(139, void, png_set_gAMA, (png_const_structrp png_ptr,
-    png_inforp info_ptr, double file_gamma))
-PNG_FIXED_EXPORT(140, void, png_set_gAMA_fixed, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_fixed_point int_file_gamma))
-#endif
-
-#ifdef PNG_hIST_SUPPORTED
-PNG_EXPORT(141, png_uint_32, png_get_hIST, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_uint_16p *hist));
-PNG_EXPORT(142, void, png_set_hIST, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_const_uint_16p hist));
-#endif
-
-PNG_EXPORT(143, png_uint_32, png_get_IHDR, (png_const_structrp png_ptr,
-    png_const_inforp info_ptr, png_uint_32 *width, png_uint_32 *height,
-    int *bit_depth, int *color_type, int *interlace_method,
-    int *compression_method, int *filter_method));
-
-PNG_EXPORT(144, void, png_set_IHDR, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 width, png_uint_32 height, int bit_depth,
-    int color_type, int interlace_method, int compression_method,
-    int filter_method));
-
-#ifdef PNG_oFFs_SUPPORTED
-PNG_EXPORT(145, png_uint_32, png_get_oFFs, (png_const_structrp png_ptr,
-   png_const_inforp info_ptr, png_int_32 *offset_x, png_int_32 *offset_y,
-   int *unit_type));
-#endif
-
-#ifdef PNG_oFFs_SUPPORTED
-PNG_EXPORT(146, void, png_set_oFFs, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_int_32 offset_x, png_int_32 offset_y,
-    int unit_type));
-#endif
-
-#ifdef PNG_pCAL_SUPPORTED
-PNG_EXPORT(147, png_uint_32, png_get_pCAL, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_charp *purpose, png_int_32 *X0,
-    png_int_32 *X1, int *type, int *nparams, png_charp *units,
-    png_charpp *params));
-#endif
-
-#ifdef PNG_pCAL_SUPPORTED
-PNG_EXPORT(148, void, png_set_pCAL, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_const_charp purpose, png_int_32 X0, png_int_32 X1,
-    int type, int nparams, png_const_charp units, png_charpp params));
-#endif
-
-#ifdef PNG_pHYs_SUPPORTED
-PNG_EXPORT(149, png_uint_32, png_get_pHYs, (png_const_structrp png_ptr,
-    png_const_inforp info_ptr, png_uint_32 *res_x, png_uint_32 *res_y,
-    int *unit_type));
-#endif
-
-#ifdef PNG_pHYs_SUPPORTED
-PNG_EXPORT(150, void, png_set_pHYs, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 res_x, png_uint_32 res_y, int unit_type));
-#endif
-
-PNG_EXPORT(151, png_uint_32, png_get_PLTE, (png_const_structrp png_ptr,
-   png_inforp info_ptr, png_colorp *palette, int *num_palette));
-
-PNG_EXPORT(152, void, png_set_PLTE, (png_structrp png_ptr,
-    png_inforp info_ptr, png_const_colorp palette, int num_palette));
-
-#ifdef PNG_sBIT_SUPPORTED
-PNG_EXPORT(153, png_uint_32, png_get_sBIT, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_color_8p *sig_bit));
-#endif
-
-#ifdef PNG_sBIT_SUPPORTED
-PNG_EXPORT(154, void, png_set_sBIT, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_const_color_8p sig_bit));
-#endif
-
-#ifdef PNG_sRGB_SUPPORTED
-PNG_EXPORT(155, png_uint_32, png_get_sRGB, (png_const_structrp png_ptr,
-    png_const_inforp info_ptr, int *file_srgb_intent));
-#endif
-
-#ifdef PNG_sRGB_SUPPORTED
-PNG_EXPORT(156, void, png_set_sRGB, (png_const_structrp png_ptr,
-    png_inforp info_ptr, int srgb_intent));
-PNG_EXPORT(157, void, png_set_sRGB_gAMA_and_cHRM, (png_const_structrp png_ptr,
-    png_inforp info_ptr, int srgb_intent));
-#endif
-
-#ifdef PNG_iCCP_SUPPORTED
-PNG_EXPORT(158, png_uint_32, png_get_iCCP, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_charpp name, int *compression_type,
-    png_bytepp profile, png_uint_32 *proflen));
-#endif
-
-#ifdef PNG_iCCP_SUPPORTED
-PNG_EXPORT(159, void, png_set_iCCP, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_const_charp name, int compression_type,
-    png_const_bytep profile, png_uint_32 proflen));
-#endif
-
-#ifdef PNG_sPLT_SUPPORTED
-PNG_EXPORT(160, int, png_get_sPLT, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_sPLT_tpp entries));
-#endif
-
-#ifdef PNG_sPLT_SUPPORTED
-PNG_EXPORT(161, void, png_set_sPLT, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_const_sPLT_tp entries, int nentries));
-#endif
-
-#ifdef PNG_TEXT_SUPPORTED
-/* png_get_text also returns the number of text chunks in *num_text */
-PNG_EXPORT(162, int, png_get_text, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_textp *text_ptr, int *num_text));
-#endif
-
-/* Note while png_set_text() will accept a structure whose text,
- * language, and  translated keywords are NULL pointers, the structure
- * returned by png_get_text will always contain regular
- * zero-terminated C strings.  They might be empty strings but
- * they will never be NULL pointers.
- */
-
-#ifdef PNG_TEXT_SUPPORTED
-PNG_EXPORT(163, void, png_set_text, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_const_textp text_ptr, int num_text));
-#endif
-
-#ifdef PNG_tIME_SUPPORTED
-PNG_EXPORT(164, png_uint_32, png_get_tIME, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_timep *mod_time));
-#endif
-
-#ifdef PNG_tIME_SUPPORTED
-PNG_EXPORT(165, void, png_set_tIME, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_const_timep mod_time));
-#endif
-
-#ifdef PNG_tRNS_SUPPORTED
-PNG_EXPORT(166, png_uint_32, png_get_tRNS, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_bytep *trans_alpha, int *num_trans,
-    png_color_16p *trans_color));
-#endif
-
-#ifdef PNG_tRNS_SUPPORTED
-PNG_EXPORT(167, void, png_set_tRNS, (png_structrp png_ptr,
-    png_inforp info_ptr, png_const_bytep trans_alpha, int num_trans,
-    png_const_color_16p trans_color));
-#endif
-
-#ifdef PNG_sCAL_SUPPORTED
-PNG_FP_EXPORT(168, png_uint_32, png_get_sCAL, (png_const_structrp png_ptr,
-    png_const_inforp info_ptr, int *unit, double *width, double *height))
-#if defined(PNG_FLOATING_ARITHMETIC_SUPPORTED) || \
-   defined(PNG_FLOATING_POINT_SUPPORTED)
-/* NOTE: this API is currently implemented using floating point arithmetic,
- * consequently it can only be used on systems with floating point support.
- * In any case the range of values supported by png_fixed_point is small and it
- * is highly recommended that png_get_sCAL_s be used instead.
- */
-PNG_FIXED_EXPORT(214, png_uint_32, png_get_sCAL_fixed,
-    (png_const_structrp png_ptr, png_const_inforp info_ptr, int *unit,
-    png_fixed_point *width, png_fixed_point *height))
-#endif
-PNG_EXPORT(169, png_uint_32, png_get_sCAL_s,
-    (png_const_structrp png_ptr, png_const_inforp info_ptr, int *unit,
-    png_charpp swidth, png_charpp sheight));
-
-PNG_FP_EXPORT(170, void, png_set_sCAL, (png_const_structrp png_ptr,
-    png_inforp info_ptr, int unit, double width, double height))
-PNG_FIXED_EXPORT(213, void, png_set_sCAL_fixed, (png_const_structrp png_ptr,
-   png_inforp info_ptr, int unit, png_fixed_point width,
-   png_fixed_point height))
-PNG_EXPORT(171, void, png_set_sCAL_s, (png_const_structrp png_ptr,
-    png_inforp info_ptr, int unit,
-    png_const_charp swidth, png_const_charp sheight));
-#endif /* sCAL */
-
-#ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED
-/* Provide the default handling for all unknown chunks or, optionally, for
- * specific unknown chunks.
- *
- * NOTE: prior to 1.6.0 the handling specified for particular chunks on read was
- * ignored and the default was used, the per-chunk setting only had an effect on
- * write.  If you wish to have chunk-specific handling on read in code that must
- * work on earlier versions you must use a user chunk callback to specify the
- * desired handling (keep or discard.)
- *
- * The 'keep' parameter is a PNG_HANDLE_CHUNK_ value as listed below.  The
- * parameter is interpreted as follows:
- *
- * READ:
- *    PNG_HANDLE_CHUNK_AS_DEFAULT:
- *       Known chunks: do normal libpng processing, do not keep the chunk (but
- *          see the comments below about PNG_HANDLE_AS_UNKNOWN_SUPPORTED)
- *       Unknown chunks: for a specific chunk use the global default, when used
- *          as the default discard the chunk data.
- *    PNG_HANDLE_CHUNK_NEVER:
- *       Discard the chunk data.
- *    PNG_HANDLE_CHUNK_IF_SAFE:
- *       Keep the chunk data if the chunk is not critical else raise a chunk
- *       error.
- *    PNG_HANDLE_CHUNK_ALWAYS:
- *       Keep the chunk data.
- *
- * If the chunk data is saved it can be retrieved using png_get_unknown_chunks,
- * below.  Notice that specifying "AS_DEFAULT" as a global default is equivalent
- * to specifying "NEVER", however when "AS_DEFAULT" is used for specific chunks
- * it simply resets the behavior to the libpng default.
- *
- * INTERACTION WITH USER CHUNK CALLBACKS:
- * The per-chunk handling is always used when there is a png_user_chunk_ptr
- * callback and the callback returns 0; the chunk is then always stored *unless*
- * it is critical and the per-chunk setting is other than ALWAYS.  Notice that
- * the global default is *not* used in this case.  (In effect the per-chunk
- * value is incremented to at least IF_SAFE.)
- *
- * IMPORTANT NOTE: this behavior will change in libpng 1.7 - the global and
- * per-chunk defaults will be honored.  If you want to preserve the current
- * behavior when your callback returns 0 you must set PNG_HANDLE_CHUNK_IF_SAFE
- * as the default - if you don't do this libpng 1.6 will issue a warning.
- *
- * If you want unhandled unknown chunks to be discarded in libpng 1.6 and
- * earlier simply return '1' (handled).
- *
- * PNG_HANDLE_AS_UNKNOWN_SUPPORTED:
- *    If this is *not* set known chunks will always be handled by libpng and
- *    will never be stored in the unknown chunk list.  Known chunks listed to
- *    png_set_keep_unknown_chunks will have no effect.  If it is set then known
- *    chunks listed with a keep other than AS_DEFAULT will *never* be processed
- *    by libpng, in addition critical chunks must either be processed by the
- *    callback or saved.
- *
- *    The IHDR and IEND chunks must not be listed.  Because this turns off the
- *    default handling for chunks that would otherwise be recognized the
- *    behavior of libpng transformations may well become incorrect!
- *
- * WRITE:
- *    When writing chunks the options only apply to the chunks specified by
- *    png_set_unknown_chunks (below), libpng will *always* write known chunks
- *    required by png_set_ calls and will always write the core critical chunks
- *    (as required for PLTE).
- *
- *    Each chunk in the png_set_unknown_chunks list is looked up in the
- *    png_set_keep_unknown_chunks list to find the keep setting, this is then
- *    interpreted as follows:
- *
- *    PNG_HANDLE_CHUNK_AS_DEFAULT:
- *       Write safe-to-copy chunks and write other chunks if the global
- *       default is set to _ALWAYS, otherwise don't write this chunk.
- *    PNG_HANDLE_CHUNK_NEVER:
- *       Do not write the chunk.
- *    PNG_HANDLE_CHUNK_IF_SAFE:
- *       Write the chunk if it is safe-to-copy, otherwise do not write it.
- *    PNG_HANDLE_CHUNK_ALWAYS:
- *       Write the chunk.
- *
- * Note that the default behavior is effectively the opposite of the read case -
- * in read unknown chunks are not stored by default, in write they are written
- * by default.  Also the behavior of PNG_HANDLE_CHUNK_IF_SAFE is very different
- * - on write the safe-to-copy bit is checked, on read the critical bit is
- * checked and on read if the chunk is critical an error will be raised.
- *
- * num_chunks:
- * ===========
- *    If num_chunks is positive, then the "keep" parameter specifies the manner
- *    for handling only those chunks appearing in the chunk_list array,
- *    otherwise the chunk list array is ignored.
- *
- *    If num_chunks is 0 the "keep" parameter specifies the default behavior for
- *    unknown chunks, as described above.
- *
- *    If num_chunks is negative, then the "keep" parameter specifies the manner
- *    for handling all unknown chunks plus all chunks recognized by libpng
- *    except for the IHDR, PLTE, tRNS, IDAT, and IEND chunks (which continue to
- *    be processed by libpng.
- */
-#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-PNG_EXPORT(172, void, png_set_keep_unknown_chunks, (png_structrp png_ptr,
-    int keep, png_const_bytep chunk_list, int num_chunks));
-#endif /* HANDLE_AS_UNKNOWN */
-
-/* The "keep" PNG_HANDLE_CHUNK_ parameter for the specified chunk is returned;
- * the result is therefore true (non-zero) if special handling is required,
- * false for the default handling.
- */
-PNG_EXPORT(173, int, png_handle_as_unknown, (png_const_structrp png_ptr,
-    png_const_bytep chunk_name));
-#endif /* SET_UNKNOWN_CHUNKS */
-
-#ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED
-PNG_EXPORT(174, void, png_set_unknown_chunks, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_const_unknown_chunkp unknowns,
-    int num_unknowns));
-   /* NOTE: prior to 1.6.0 this routine set the 'location' field of the added
-    * unknowns to the location currently stored in the png_struct.  This is
-    * invariably the wrong value on write.  To fix this call the following API
-    * for each chunk in the list with the correct location.  If you know your
-    * code won't be compiled on earlier versions you can rely on
-    * png_set_unknown_chunks(write-ptr, png_get_unknown_chunks(read-ptr)) doing
-    * the correct thing.
-    */
-
-PNG_EXPORT(175, void, png_set_unknown_chunk_location,
-    (png_const_structrp png_ptr, png_inforp info_ptr, int chunk, int location));
-
-PNG_EXPORT(176, int, png_get_unknown_chunks, (png_const_structrp png_ptr,
-    png_inforp info_ptr, png_unknown_chunkpp entries));
-#endif
-
-/* Png_free_data() will turn off the "valid" flag for anything it frees.
- * If you need to turn it off for a chunk that your application has freed,
- * you can use png_set_invalid(png_ptr, info_ptr, PNG_INFO_CHNK);
- */
-PNG_EXPORT(177, void, png_set_invalid, (png_const_structrp png_ptr,
-    png_inforp info_ptr, int mask));
-
-#ifdef PNG_INFO_IMAGE_SUPPORTED
-/* The "params" pointer is currently not used and is for future expansion. */
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-PNG_EXPORT(178, void, png_read_png, (png_structrp png_ptr, png_inforp info_ptr,
-    int transforms, png_voidp params));
-#endif
-#ifdef PNG_WRITE_SUPPORTED
-PNG_EXPORT(179, void, png_write_png, (png_structrp png_ptr, png_inforp info_ptr,
-    int transforms, png_voidp params));
-#endif
-#endif
-
-PNG_EXPORT(180, png_const_charp, png_get_copyright,
-    (png_const_structrp png_ptr));
-PNG_EXPORT(181, png_const_charp, png_get_header_ver,
-    (png_const_structrp png_ptr));
-PNG_EXPORT(182, png_const_charp, png_get_header_version,
-    (png_const_structrp png_ptr));
-PNG_EXPORT(183, png_const_charp, png_get_libpng_ver,
-    (png_const_structrp png_ptr));
-
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-PNG_EXPORT(184, png_uint_32, png_permit_mng_features, (png_structrp png_ptr,
-    png_uint_32 mng_features_permitted));
-#endif
-
-/* For use in png_set_keep_unknown, added to version 1.2.6 */
-#define PNG_HANDLE_CHUNK_AS_DEFAULT   0
-#define PNG_HANDLE_CHUNK_NEVER        1
-#define PNG_HANDLE_CHUNK_IF_SAFE      2
-#define PNG_HANDLE_CHUNK_ALWAYS       3
-#define PNG_HANDLE_CHUNK_LAST         4
-
-/* Strip the prepended error numbers ("#nnn ") from error and warning
- * messages before passing them to the error or warning handler.
- */
-#ifdef PNG_ERROR_NUMBERS_SUPPORTED
-PNG_EXPORT(185, void, png_set_strip_error_numbers, (png_structrp png_ptr,
-    png_uint_32 strip_mode));
-#endif
-
-/* Added in libpng-1.2.6 */
-#ifdef PNG_SET_USER_LIMITS_SUPPORTED
-PNG_EXPORT(186, void, png_set_user_limits, (png_structrp png_ptr,
-    png_uint_32 user_width_max, png_uint_32 user_height_max));
-PNG_EXPORT(187, png_uint_32, png_get_user_width_max,
-    (png_const_structrp png_ptr));
-PNG_EXPORT(188, png_uint_32, png_get_user_height_max,
-    (png_const_structrp png_ptr));
-/* Added in libpng-1.4.0 */
-PNG_EXPORT(189, void, png_set_chunk_cache_max, (png_structrp png_ptr,
-    png_uint_32 user_chunk_cache_max));
-PNG_EXPORT(190, png_uint_32, png_get_chunk_cache_max,
-    (png_const_structrp png_ptr));
-/* Added in libpng-1.4.1 */
-PNG_EXPORT(191, void, png_set_chunk_malloc_max, (png_structrp png_ptr,
-    png_alloc_size_t user_chunk_cache_max));
-PNG_EXPORT(192, png_alloc_size_t, png_get_chunk_malloc_max,
-    (png_const_structrp png_ptr));
-#endif
-
-#if defined(PNG_INCH_CONVERSIONS_SUPPORTED)
-PNG_EXPORT(193, png_uint_32, png_get_pixels_per_inch,
-    (png_const_structrp png_ptr, png_const_inforp info_ptr));
-
-PNG_EXPORT(194, png_uint_32, png_get_x_pixels_per_inch,
-    (png_const_structrp png_ptr, png_const_inforp info_ptr));
-
-PNG_EXPORT(195, png_uint_32, png_get_y_pixels_per_inch,
-    (png_const_structrp png_ptr, png_const_inforp info_ptr));
-
-PNG_FP_EXPORT(196, float, png_get_x_offset_inches,
-    (png_const_structrp png_ptr, png_const_inforp info_ptr))
-#ifdef PNG_FIXED_POINT_SUPPORTED /* otherwise not implemented. */
-PNG_FIXED_EXPORT(211, png_fixed_point, png_get_x_offset_inches_fixed,
-    (png_const_structrp png_ptr, png_const_inforp info_ptr))
-#endif
-
-PNG_FP_EXPORT(197, float, png_get_y_offset_inches, (png_const_structrp png_ptr,
-    png_const_inforp info_ptr))
-#ifdef PNG_FIXED_POINT_SUPPORTED /* otherwise not implemented. */
-PNG_FIXED_EXPORT(212, png_fixed_point, png_get_y_offset_inches_fixed,
-    (png_const_structrp png_ptr, png_const_inforp info_ptr))
-#endif
-
-#  ifdef PNG_pHYs_SUPPORTED
-PNG_EXPORT(198, png_uint_32, png_get_pHYs_dpi, (png_const_structrp png_ptr,
-    png_const_inforp info_ptr, png_uint_32 *res_x, png_uint_32 *res_y,
-    int *unit_type));
-#  endif /* pHYs */
-#endif  /* INCH_CONVERSIONS */
-
-/* Added in libpng-1.4.0 */
-#ifdef PNG_IO_STATE_SUPPORTED
-PNG_EXPORT(199, png_uint_32, png_get_io_state, (png_const_structrp png_ptr));
-
-/* Removed from libpng 1.6; use png_get_io_chunk_type. */
-PNG_REMOVED(200, png_const_bytep, png_get_io_chunk_name, (png_structrp png_ptr),
-    PNG_DEPRECATED)
-
-PNG_EXPORT(216, png_uint_32, png_get_io_chunk_type,
-    (png_const_structrp png_ptr));
-
-/* The flags returned by png_get_io_state() are the following: */
-#  define PNG_IO_NONE        0x0000   /* no I/O at this moment */
-#  define PNG_IO_READING     0x0001   /* currently reading */
-#  define PNG_IO_WRITING     0x0002   /* currently writing */
-#  define PNG_IO_SIGNATURE   0x0010   /* currently at the file signature */
-#  define PNG_IO_CHUNK_HDR   0x0020   /* currently at the chunk header */
-#  define PNG_IO_CHUNK_DATA  0x0040   /* currently at the chunk data */
-#  define PNG_IO_CHUNK_CRC   0x0080   /* currently at the chunk crc */
-#  define PNG_IO_MASK_OP     0x000f   /* current operation: reading/writing */
-#  define PNG_IO_MASK_LOC    0x00f0   /* current location: sig/hdr/data/crc */
-#endif /* IO_STATE */
-
-/* Interlace support.  The following macros are always defined so that if
- * libpng interlace handling is turned off the macros may be used to handle
- * interlaced images within the application.
- */
-#define PNG_INTERLACE_ADAM7_PASSES 7
-
-/* Two macros to return the first row and first column of the original,
- * full, image which appears in a given pass.  'pass' is in the range 0
- * to 6 and the result is in the range 0 to 7.
- */
-#define PNG_PASS_START_ROW(pass) (((1&~(pass))<<(3-((pass)>>1)))&7)
-#define PNG_PASS_START_COL(pass) (((1& (pass))<<(3-(((pass)+1)>>1)))&7)
-
-/* A macro to return the offset between pixels in the output row for a pair of
- * pixels in the input - effectively the inverse of the 'COL_SHIFT' macro that
- * follows.  Note that ROW_OFFSET is the offset from one row to the next whereas
- * COL_OFFSET is from one column to the next, within a row.
- */
-#define PNG_PASS_ROW_OFFSET(pass) ((pass)>2?(8>>(((pass)-1)>>1)):8)
-#define PNG_PASS_COL_OFFSET(pass) (1<<((7-(pass))>>1))
-
-/* Two macros to help evaluate the number of rows or columns in each
- * pass.  This is expressed as a shift - effectively log2 of the number or
- * rows or columns in each 8x8 tile of the original image.
- */
-#define PNG_PASS_ROW_SHIFT(pass) ((pass)>2?(8-(pass))>>1:3)
-#define PNG_PASS_COL_SHIFT(pass) ((pass)>1?(7-(pass))>>1:3)
-
-/* Hence two macros to determine the number of rows or columns in a given
- * pass of an image given its height or width.  In fact these macros may
- * return non-zero even though the sub-image is empty, because the other
- * dimension may be empty for a small image.
- */
-#define PNG_PASS_ROWS(height, pass) (((height)+(((1<<PNG_PASS_ROW_SHIFT(pass))\
-   -1)-PNG_PASS_START_ROW(pass)))>>PNG_PASS_ROW_SHIFT(pass))
-#define PNG_PASS_COLS(width, pass) (((width)+(((1<<PNG_PASS_COL_SHIFT(pass))\
-   -1)-PNG_PASS_START_COL(pass)))>>PNG_PASS_COL_SHIFT(pass))
-
-/* For the reader row callbacks (both progressive and sequential) it is
- * necessary to find the row in the output image given a row in an interlaced
- * image, so two more macros:
- */
-#define PNG_ROW_FROM_PASS_ROW(y_in, pass) \
-   (((y_in)<<PNG_PASS_ROW_SHIFT(pass))+PNG_PASS_START_ROW(pass))
-#define PNG_COL_FROM_PASS_COL(x_in, pass) \
-   (((x_in)<<PNG_PASS_COL_SHIFT(pass))+PNG_PASS_START_COL(pass))
-
-/* Two macros which return a boolean (0 or 1) saying whether the given row
- * or column is in a particular pass.  These use a common utility macro that
- * returns a mask for a given pass - the offset 'off' selects the row or
- * column version.  The mask has the appropriate bit set for each column in
- * the tile.
- */
-#define PNG_PASS_MASK(pass,off) ( \
-   ((0x110145AF>>(((7-(off))-(pass))<<2)) & 0xF) | \
-   ((0x01145AF0>>(((7-(off))-(pass))<<2)) & 0xF0))
-
-#define PNG_ROW_IN_INTERLACE_PASS(y, pass) \
-   ((PNG_PASS_MASK(pass,0) >> ((y)&7)) & 1)
-#define PNG_COL_IN_INTERLACE_PASS(x, pass) \
-   ((PNG_PASS_MASK(pass,1) >> ((x)&7)) & 1)
-
-#ifdef PNG_READ_COMPOSITE_NODIV_SUPPORTED
-/* With these routines we avoid an integer divide, which will be slower on
- * most machines.  However, it does take more operations than the corresponding
- * divide method, so it may be slower on a few RISC systems.  There are two
- * shifts (by 8 or 16 bits) and an addition, versus a single integer divide.
- *
- * Note that the rounding factors are NOT supposed to be the same!  128 and
- * 32768 are correct for the NODIV code; 127 and 32767 are correct for the
- * standard method.
- *
- * [Optimized code by Greg Roelofs and Mark Adler...blame us for bugs. :-) ]
- */
-
- /* fg and bg should be in `gamma 1.0' space; alpha is the opacity */
-
-#  define png_composite(composite, fg, alpha, bg)        \
-   {                                                     \
-      png_uint_16 temp = (png_uint_16)((png_uint_16)(fg) \
-          * (png_uint_16)(alpha)                         \
-          + (png_uint_16)(bg)*(png_uint_16)(255          \
-          - (png_uint_16)(alpha)) + 128);                \
-      (composite) = (png_byte)(((temp + (temp >> 8)) >> 8) & 0xff); \
-   }
-
-#  define png_composite_16(composite, fg, alpha, bg)     \
-   {                                                     \
-      png_uint_32 temp = (png_uint_32)((png_uint_32)(fg) \
-          * (png_uint_32)(alpha)                         \
-          + (png_uint_32)(bg)*(65535                     \
-          - (png_uint_32)(alpha)) + 32768);              \
-      (composite) = (png_uint_16)(0xffff & ((temp + (temp >> 16)) >> 16)); \
-   }
-
-#else  /* Standard method using integer division */
-
-#  define png_composite(composite, fg, alpha, bg)                      \
-   (composite) =                                                       \
-       (png_byte)(0xff & (((png_uint_16)(fg) * (png_uint_16)(alpha) +  \
-       (png_uint_16)(bg) * (png_uint_16)(255 - (png_uint_16)(alpha)) + \
-       127) / 255))
-
-#  define png_composite_16(composite, fg, alpha, bg)                       \
-   (composite) =                                                           \
-       (png_uint_16)(0xffff & (((png_uint_32)(fg) * (png_uint_32)(alpha) + \
-       (png_uint_32)(bg)*(png_uint_32)(65535 - (png_uint_32)(alpha)) +     \
-       32767) / 65535))
-#endif /* READ_COMPOSITE_NODIV */
-
-#ifdef PNG_READ_INT_FUNCTIONS_SUPPORTED
-PNG_EXPORT(201, png_uint_32, png_get_uint_32, (png_const_bytep buf));
-PNG_EXPORT(202, png_uint_16, png_get_uint_16, (png_const_bytep buf));
-PNG_EXPORT(203, png_int_32, png_get_int_32, (png_const_bytep buf));
-#endif
-
-PNG_EXPORT(204, png_uint_32, png_get_uint_31, (png_const_structrp png_ptr,
-    png_const_bytep buf));
-/* No png_get_int_16 -- may be added if there's a real need for it. */
-
-/* Place a 32-bit number into a buffer in PNG byte order (big-endian). */
-#ifdef PNG_WRITE_INT_FUNCTIONS_SUPPORTED
-PNG_EXPORT(205, void, png_save_uint_32, (png_bytep buf, png_uint_32 i));
-#endif
-#ifdef PNG_SAVE_INT_32_SUPPORTED
-PNG_EXPORT(206, void, png_save_int_32, (png_bytep buf, png_int_32 i));
-#endif
-
-/* Place a 16-bit number into a buffer in PNG byte order.
- * The parameter is declared unsigned int, not png_uint_16,
- * just to avoid potential problems on pre-ANSI C compilers.
- */
-#ifdef PNG_WRITE_INT_FUNCTIONS_SUPPORTED
-PNG_EXPORT(207, void, png_save_uint_16, (png_bytep buf, unsigned int i));
-/* No png_save_int_16 -- may be added if there's a real need for it. */
-#endif
-
-#ifdef PNG_USE_READ_MACROS
-/* Inline macros to do direct reads of bytes from the input buffer.
- * The png_get_int_32() routine assumes we are using two's complement
- * format for negative values, which is almost certainly true.
- */
-#  define PNG_get_uint_32(buf) \
-   (((png_uint_32)(*(buf)) << 24) + \
-    ((png_uint_32)(*((buf) + 1)) << 16) + \
-    ((png_uint_32)(*((buf) + 2)) << 8) + \
-    ((png_uint_32)(*((buf) + 3))))
-
-   /* From libpng-1.4.0 until 1.4.4, the png_get_uint_16 macro (but not the
-    * function) incorrectly returned a value of type png_uint_32.
-    */
-#  define PNG_get_uint_16(buf) \
-   ((png_uint_16) \
-    (((unsigned int)(*(buf)) << 8) + \
-    ((unsigned int)(*((buf) + 1)))))
-
-#  define PNG_get_int_32(buf) \
-   ((png_int_32)((*(buf) & 0x80) \
-    ? -((png_int_32)(((png_get_uint_32(buf)^0xffffffffU)+1U)&0x7fffffffU)) \
-    : (png_int_32)png_get_uint_32(buf)))
-
-/* If PNG_PREFIX is defined the same thing as below happens in pnglibconf.h,
- * but defining a macro name prefixed with PNG_PREFIX.
- */
-#  ifndef PNG_PREFIX
-#    define png_get_uint_32(buf) PNG_get_uint_32(buf)
-#    define png_get_uint_16(buf) PNG_get_uint_16(buf)
-#    define png_get_int_32(buf)  PNG_get_int_32(buf)
-#  endif
-#else
-#  ifdef PNG_PREFIX
-   /* No macros; revert to the (redefined) function */
-#    define PNG_get_uint_32 (png_get_uint_32)
-#    define PNG_get_uint_16 (png_get_uint_16)
-#    define PNG_get_int_32  (png_get_int_32)
-#  endif
-#endif
-
-#ifdef PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED
-PNG_EXPORT(242, void, png_set_check_for_invalid_index,
-    (png_structrp png_ptr, int allowed));
-#  ifdef PNG_GET_PALETTE_MAX_SUPPORTED
-PNG_EXPORT(243, int, png_get_palette_max, (png_const_structp png_ptr,
-    png_const_infop info_ptr));
-#  endif
-#endif /* CHECK_FOR_INVALID_INDEX */
-
-/*******************************************************************************
- * Section 5: SIMPLIFIED API
- *******************************************************************************
- *
- * Please read the documentation in libpng-manual.txt (TODO: write said
- * documentation) if you don't understand what follows.
- *
- * The simplified API hides the details of both libpng and the PNG file format
- * itself.  It allows PNG files to be read into a very limited number of
- * in-memory bitmap formats or to be written from the same formats.  If these
- * formats do not accommodate your needs then you can, and should, use the more
- * sophisticated APIs above - these support a wide variety of in-memory formats
- * and a wide variety of sophisticated transformations to those formats as well
- * as a wide variety of APIs to manipulate ancillary information.
- *
- * To read a PNG file using the simplified API:
- *
- * 1) Declare a 'png_image' structure (see below) on the stack, set the
- *    version field to PNG_IMAGE_VERSION and the 'opaque' pointer to NULL
- *    (this is REQUIRED, your program may crash if you don't do it.)
- * 2) Call the appropriate png_image_begin_read... function.
- * 3) Set the png_image 'format' member to the required sample format.
- * 4) Allocate a buffer for the image and, if required, the color-map.
- * 5) Call png_image_finish_read to read the image and, if required, the
- *    color-map into your buffers.
- *
- * There are no restrictions on the format of the PNG input itself; all valid
- * color types, bit depths, and interlace methods are acceptable, and the
- * input image is transformed as necessary to the requested in-memory format
- * during the png_image_finish_read() step.  The only caveat is that if you
- * request a color-mapped image from a PNG that is full-color or makes
- * complex use of an alpha channel the transformation is extremely lossy and the
- * result may look terrible.
- *
- * To write a PNG file using the simplified API:
- *
- * 1) Declare a 'png_image' structure on the stack and memset() it to all zero.
- * 2) Initialize the members of the structure that describe the image, setting
- *    the 'format' member to the format of the image samples.
- * 3) Call the appropriate png_image_write... function with a pointer to the
- *    image and, if necessary, the color-map to write the PNG data.
- *
- * png_image is a structure that describes the in-memory format of an image
- * when it is being read or defines the in-memory format of an image that you
- * need to write:
- */
-#if defined(PNG_SIMPLIFIED_READ_SUPPORTED) || \
-    defined(PNG_SIMPLIFIED_WRITE_SUPPORTED)
-
-#define PNG_IMAGE_VERSION 1
-
-typedef struct png_control *png_controlp;
-typedef struct
-{
-   png_controlp opaque;    /* Initialize to NULL, free with png_image_free */
-   png_uint_32  version;   /* Set to PNG_IMAGE_VERSION */
-   png_uint_32  width;     /* Image width in pixels (columns) */
-   png_uint_32  height;    /* Image height in pixels (rows) */
-   png_uint_32  format;    /* Image format as defined below */
-   png_uint_32  flags;     /* A bit mask containing informational flags */
-   png_uint_32  colormap_entries;
-                           /* Number of entries in the color-map */
-
-   /* In the event of an error or warning the following field will be set to a
-    * non-zero value and the 'message' field will contain a '\0' terminated
-    * string with the libpng error or warning message.  If both warnings and
-    * an error were encountered, only the error is recorded.  If there
-    * are multiple warnings, only the first one is recorded.
-    *
-    * The upper 30 bits of this value are reserved, the low two bits contain
-    * a value as follows:
-    */
-#  define PNG_IMAGE_WARNING 1
-#  define PNG_IMAGE_ERROR 2
-   /*
-    * The result is a two-bit code such that a value more than 1 indicates
-    * a failure in the API just called:
-    *
-    *    0 - no warning or error
-    *    1 - warning
-    *    2 - error
-    *    3 - error preceded by warning
-    */
-#  define PNG_IMAGE_FAILED(png_cntrl) ((((png_cntrl).warning_or_error)&0x03)>1)
-
-   png_uint_32  warning_or_error;
-
-   char         message[64];
-} png_image, *png_imagep;
-
-/* The samples of the image have one to four channels whose components have
- * original values in the range 0 to 1.0:
- *
- * 1: A single gray or luminance channel (G).
- * 2: A gray/luminance channel and an alpha channel (GA).
- * 3: Three red, green, blue color channels (RGB).
- * 4: Three color channels and an alpha channel (RGBA).
- *
- * The components are encoded in one of two ways:
- *
- * a) As a small integer, value 0..255, contained in a single byte.  For the
- * alpha channel the original value is simply value/255.  For the color or
- * luminance channels the value is encoded according to the sRGB specification
- * and matches the 8-bit format expected by typical display devices.
- *
- * The color/gray channels are not scaled (pre-multiplied) by the alpha
- * channel and are suitable for passing to color management software.
- *
- * b) As a value in the range 0..65535, contained in a 2-byte integer.  All
- * channels can be converted to the original value by dividing by 65535; all
- * channels are linear.  Color channels use the RGB encoding (RGB end-points) of
- * the sRGB specification.  This encoding is identified by the
- * PNG_FORMAT_FLAG_LINEAR flag below.
- *
- * When the simplified API needs to convert between sRGB and linear colorspaces,
- * the actual sRGB transfer curve defined in the sRGB specification (see the
- * article at <https://en.wikipedia.org/wiki/SRGB>) is used, not the gamma=1/2.2
- * approximation used elsewhere in libpng.
- *
- * When an alpha channel is present it is expected to denote pixel coverage
- * of the color or luminance channels and is returned as an associated alpha
- * channel: the color/gray channels are scaled (pre-multiplied) by the alpha
- * value.
- *
- * The samples are either contained directly in the image data, between 1 and 8
- * bytes per pixel according to the encoding, or are held in a color-map indexed
- * by bytes in the image data.  In the case of a color-map the color-map entries
- * are individual samples, encoded as above, and the image data has one byte per
- * pixel to select the relevant sample from the color-map.
- */
-
-/* PNG_FORMAT_*
- *
- * #defines to be used in png_image::format.  Each #define identifies a
- * particular layout of sample data and, if present, alpha values.  There are
- * separate defines for each of the two component encodings.
- *
- * A format is built up using single bit flag values.  All combinations are
- * valid.  Formats can be built up from the flag values or you can use one of
- * the predefined values below.  When testing formats always use the FORMAT_FLAG
- * macros to test for individual features - future versions of the library may
- * add new flags.
- *
- * When reading or writing color-mapped images the format should be set to the
- * format of the entries in the color-map then png_image_{read,write}_colormap
- * called to read or write the color-map and set the format correctly for the
- * image data.  Do not set the PNG_FORMAT_FLAG_COLORMAP bit directly!
- *
- * NOTE: libpng can be built with particular features disabled. If you see
- * compiler errors because the definition of one of the following flags has been
- * compiled out it is because libpng does not have the required support.  It is
- * possible, however, for the libpng configuration to enable the format on just
- * read or just write; in that case you may see an error at run time.  You can
- * guard against this by checking for the definition of the appropriate
- * "_SUPPORTED" macro, one of:
- *
- *    PNG_SIMPLIFIED_{READ,WRITE}_{BGR,AFIRST}_SUPPORTED
- */
-#define PNG_FORMAT_FLAG_ALPHA    0x01U /* format with an alpha channel */
-#define PNG_FORMAT_FLAG_COLOR    0x02U /* color format: otherwise grayscale */
-#define PNG_FORMAT_FLAG_LINEAR   0x04U /* 2-byte channels else 1-byte */
-#define PNG_FORMAT_FLAG_COLORMAP 0x08U /* image data is color-mapped */
-
-#ifdef PNG_FORMAT_BGR_SUPPORTED
-#  define PNG_FORMAT_FLAG_BGR    0x10U /* BGR colors, else order is RGB */
-#endif
-
-#ifdef PNG_FORMAT_AFIRST_SUPPORTED
-#  define PNG_FORMAT_FLAG_AFIRST 0x20U /* alpha channel comes first */
-#endif
-
-#define PNG_FORMAT_FLAG_ASSOCIATED_ALPHA 0x40U /* alpha channel is associated */
-
-/* Commonly used formats have predefined macros.
- *
- * First the single byte (sRGB) formats:
- */
-#define PNG_FORMAT_GRAY 0
-#define PNG_FORMAT_GA   PNG_FORMAT_FLAG_ALPHA
-#define PNG_FORMAT_AG   (PNG_FORMAT_GA|PNG_FORMAT_FLAG_AFIRST)
-#define PNG_FORMAT_RGB  PNG_FORMAT_FLAG_COLOR
-#define PNG_FORMAT_BGR  (PNG_FORMAT_FLAG_COLOR|PNG_FORMAT_FLAG_BGR)
-#define PNG_FORMAT_RGBA (PNG_FORMAT_RGB|PNG_FORMAT_FLAG_ALPHA)
-#define PNG_FORMAT_ARGB (PNG_FORMAT_RGBA|PNG_FORMAT_FLAG_AFIRST)
-#define PNG_FORMAT_BGRA (PNG_FORMAT_BGR|PNG_FORMAT_FLAG_ALPHA)
-#define PNG_FORMAT_ABGR (PNG_FORMAT_BGRA|PNG_FORMAT_FLAG_AFIRST)
-
-/* Then the linear 2-byte formats.  When naming these "Y" is used to
- * indicate a luminance (gray) channel.
- */
-#define PNG_FORMAT_LINEAR_Y PNG_FORMAT_FLAG_LINEAR
-#define PNG_FORMAT_LINEAR_Y_ALPHA (PNG_FORMAT_FLAG_LINEAR|PNG_FORMAT_FLAG_ALPHA)
-#define PNG_FORMAT_LINEAR_RGB (PNG_FORMAT_FLAG_LINEAR|PNG_FORMAT_FLAG_COLOR)
-#define PNG_FORMAT_LINEAR_RGB_ALPHA \
-   (PNG_FORMAT_FLAG_LINEAR|PNG_FORMAT_FLAG_COLOR|PNG_FORMAT_FLAG_ALPHA)
-
-/* With color-mapped formats the image data is one byte for each pixel, the byte
- * is an index into the color-map which is formatted as above.  To obtain a
- * color-mapped format it is sufficient just to add the PNG_FOMAT_FLAG_COLORMAP
- * to one of the above definitions, or you can use one of the definitions below.
- */
-#define PNG_FORMAT_RGB_COLORMAP  (PNG_FORMAT_RGB|PNG_FORMAT_FLAG_COLORMAP)
-#define PNG_FORMAT_BGR_COLORMAP  (PNG_FORMAT_BGR|PNG_FORMAT_FLAG_COLORMAP)
-#define PNG_FORMAT_RGBA_COLORMAP (PNG_FORMAT_RGBA|PNG_FORMAT_FLAG_COLORMAP)
-#define PNG_FORMAT_ARGB_COLORMAP (PNG_FORMAT_ARGB|PNG_FORMAT_FLAG_COLORMAP)
-#define PNG_FORMAT_BGRA_COLORMAP (PNG_FORMAT_BGRA|PNG_FORMAT_FLAG_COLORMAP)
-#define PNG_FORMAT_ABGR_COLORMAP (PNG_FORMAT_ABGR|PNG_FORMAT_FLAG_COLORMAP)
-
-/* PNG_IMAGE macros
- *
- * These are convenience macros to derive information from a png_image
- * structure.  The PNG_IMAGE_SAMPLE_ macros return values appropriate to the
- * actual image sample values - either the entries in the color-map or the
- * pixels in the image.  The PNG_IMAGE_PIXEL_ macros return corresponding values
- * for the pixels and will always return 1 for color-mapped formats.  The
- * remaining macros return information about the rows in the image and the
- * complete image.
- *
- * NOTE: All the macros that take a png_image::format parameter are compile time
- * constants if the format parameter is, itself, a constant.  Therefore these
- * macros can be used in array declarations and case labels where required.
- * Similarly the macros are also pre-processor constants (sizeof is not used) so
- * they can be used in #if tests.
- *
- * First the information about the samples.
- */
-#define PNG_IMAGE_SAMPLE_CHANNELS(fmt)\
-   (((fmt)&(PNG_FORMAT_FLAG_COLOR|PNG_FORMAT_FLAG_ALPHA))+1)
-   /* Return the total number of channels in a given format: 1..4 */
-
-#define PNG_IMAGE_SAMPLE_COMPONENT_SIZE(fmt)\
-   ((((fmt) & PNG_FORMAT_FLAG_LINEAR) >> 2)+1)
-   /* Return the size in bytes of a single component of a pixel or color-map
-    * entry (as appropriate) in the image: 1 or 2.
-    */
-
-#define PNG_IMAGE_SAMPLE_SIZE(fmt)\
-   (PNG_IMAGE_SAMPLE_CHANNELS(fmt) * PNG_IMAGE_SAMPLE_COMPONENT_SIZE(fmt))
-   /* This is the size of the sample data for one sample.  If the image is
-    * color-mapped it is the size of one color-map entry (and image pixels are
-    * one byte in size), otherwise it is the size of one image pixel.
-    */
-
-#define PNG_IMAGE_MAXIMUM_COLORMAP_COMPONENTS(fmt)\
-   (PNG_IMAGE_SAMPLE_CHANNELS(fmt) * 256)
-   /* The maximum size of the color-map required by the format expressed in a
-    * count of components.  This can be used to compile-time allocate a
-    * color-map:
-    *
-    * png_uint_16 colormap[PNG_IMAGE_MAXIMUM_COLORMAP_COMPONENTS(linear_fmt)];
-    *
-    * png_byte colormap[PNG_IMAGE_MAXIMUM_COLORMAP_COMPONENTS(sRGB_fmt)];
-    *
-    * Alternatively use the PNG_IMAGE_COLORMAP_SIZE macro below to use the
-    * information from one of the png_image_begin_read_ APIs and dynamically
-    * allocate the required memory.
-    */
-
-/* Corresponding information about the pixels */
-#define PNG_IMAGE_PIXEL_(test,fmt)\
-   (((fmt)&PNG_FORMAT_FLAG_COLORMAP)?1:test(fmt))
-
-#define PNG_IMAGE_PIXEL_CHANNELS(fmt)\
-   PNG_IMAGE_PIXEL_(PNG_IMAGE_SAMPLE_CHANNELS,fmt)
-   /* The number of separate channels (components) in a pixel; 1 for a
-    * color-mapped image.
-    */
-
-#define PNG_IMAGE_PIXEL_COMPONENT_SIZE(fmt)\
-   PNG_IMAGE_PIXEL_(PNG_IMAGE_SAMPLE_COMPONENT_SIZE,fmt)
-   /* The size, in bytes, of each component in a pixel; 1 for a color-mapped
-    * image.
-    */
-
-#define PNG_IMAGE_PIXEL_SIZE(fmt) PNG_IMAGE_PIXEL_(PNG_IMAGE_SAMPLE_SIZE,fmt)
-   /* The size, in bytes, of a complete pixel; 1 for a color-mapped image. */
-
-/* Information about the whole row, or whole image */
-#define PNG_IMAGE_ROW_STRIDE(image)\
-   (PNG_IMAGE_PIXEL_CHANNELS((image).format) * (image).width)
-   /* Return the total number of components in a single row of the image; this
-    * is the minimum 'row stride', the minimum count of components between each
-    * row.  For a color-mapped image this is the minimum number of bytes in a
-    * row.
-    *
-    * WARNING: this macro overflows for some images with more than one component
-    * and very large image widths.  libpng will refuse to process an image where
-    * this macro would overflow.
-    */
-
-#define PNG_IMAGE_BUFFER_SIZE(image, row_stride)\
-   (PNG_IMAGE_PIXEL_COMPONENT_SIZE((image).format)*(image).height*(row_stride))
-   /* Return the size, in bytes, of an image buffer given a png_image and a row
-    * stride - the number of components to leave space for in each row.
-    *
-    * WARNING: this macro overflows a 32-bit integer for some large PNG images,
-    * libpng will refuse to process an image where such an overflow would occur.
-    */
-
-#define PNG_IMAGE_SIZE(image)\
-   PNG_IMAGE_BUFFER_SIZE(image, PNG_IMAGE_ROW_STRIDE(image))
-   /* Return the size, in bytes, of the image in memory given just a png_image;
-    * the row stride is the minimum stride required for the image.
-    */
-
-#define PNG_IMAGE_COLORMAP_SIZE(image)\
-   (PNG_IMAGE_SAMPLE_SIZE((image).format) * (image).colormap_entries)
-   /* Return the size, in bytes, of the color-map of this image.  If the image
-    * format is not a color-map format this will return a size sufficient for
-    * 256 entries in the given format; check PNG_FORMAT_FLAG_COLORMAP if
-    * you don't want to allocate a color-map in this case.
-    */
-
-/* PNG_IMAGE_FLAG_*
- *
- * Flags containing additional information about the image are held in the
- * 'flags' field of png_image.
- */
-#define PNG_IMAGE_FLAG_COLORSPACE_NOT_sRGB 0x01
-   /* This indicates that the RGB values of the in-memory bitmap do not
-    * correspond to the red, green and blue end-points defined by sRGB.
-    */
-
-#define PNG_IMAGE_FLAG_FAST 0x02
-   /* On write emphasise speed over compression; the resultant PNG file will be
-    * larger but will be produced significantly faster, particular for large
-    * images.  Do not use this option for images which will be distributed, only
-    * used it when producing intermediate files that will be read back in
-    * repeatedly.  For a typical 24-bit image the option will double the read
-    * speed at the cost of increasing the image size by 25%, however for many
-    * more compressible images the PNG file can be 10 times larger with only a
-    * slight speed gain.
-    */
-
-#define PNG_IMAGE_FLAG_16BIT_sRGB 0x04
-   /* On read if the image is a 16-bit per component image and there is no gAMA
-    * or sRGB chunk assume that the components are sRGB encoded.  Notice that
-    * images output by the simplified API always have gamma information; setting
-    * this flag only affects the interpretation of 16-bit images from an
-    * external source.  It is recommended that the application expose this flag
-    * to the user; the user can normally easily recognize the difference between
-    * linear and sRGB encoding.  This flag has no effect on write - the data
-    * passed to the write APIs must have the correct encoding (as defined
-    * above.)
-    *
-    * If the flag is not set (the default) input 16-bit per component data is
-    * assumed to be linear.
-    *
-    * NOTE: the flag can only be set after the png_image_begin_read_ call,
-    * because that call initializes the 'flags' field.
-    */
-
-#ifdef PNG_SIMPLIFIED_READ_SUPPORTED
-/* READ APIs
- * ---------
- *
- * The png_image passed to the read APIs must have been initialized by setting
- * the png_controlp field 'opaque' to NULL (or, safer, memset the whole thing.)
- */
-#ifdef PNG_STDIO_SUPPORTED
-PNG_EXPORT(234, int, png_image_begin_read_from_file, (png_imagep image,
-   const char *file_name));
-   /* The named file is opened for read and the image header is filled in
-    * from the PNG header in the file.
-    */
-
-PNG_EXPORT(235, int, png_image_begin_read_from_stdio, (png_imagep image,
-   FILE* file));
-   /* The PNG header is read from the stdio FILE object. */
-#endif /* STDIO */
-
-PNG_EXPORT(236, int, png_image_begin_read_from_memory, (png_imagep image,
-   png_const_voidp memory, size_t size));
-   /* The PNG header is read from the given memory buffer. */
-
-PNG_EXPORT(237, int, png_image_finish_read, (png_imagep image,
-   png_const_colorp background, void *buffer, png_int_32 row_stride,
-   void *colormap));
-   /* Finish reading the image into the supplied buffer and clean up the
-    * png_image structure.
-    *
-    * row_stride is the step, in byte or 2-byte units as appropriate,
-    * between adjacent rows.  A positive stride indicates that the top-most row
-    * is first in the buffer - the normal top-down arrangement.  A negative
-    * stride indicates that the bottom-most row is first in the buffer.
-    *
-    * background need only be supplied if an alpha channel must be removed from
-    * a png_byte format and the removal is to be done by compositing on a solid
-    * color; otherwise it may be NULL and any composition will be done directly
-    * onto the buffer.  The value is an sRGB color to use for the background,
-    * for grayscale output the green channel is used.
-    *
-    * background must be supplied when an alpha channel must be removed from a
-    * single byte color-mapped output format, in other words if:
-    *
-    * 1) The original format from png_image_begin_read_from_* had
-    *    PNG_FORMAT_FLAG_ALPHA set.
-    * 2) The format set by the application does not.
-    * 3) The format set by the application has PNG_FORMAT_FLAG_COLORMAP set and
-    *    PNG_FORMAT_FLAG_LINEAR *not* set.
-    *
-    * For linear output removing the alpha channel is always done by compositing
-    * on black and background is ignored.
-    *
-    * colormap must be supplied when PNG_FORMAT_FLAG_COLORMAP is set.  It must
-    * be at least the size (in bytes) returned by PNG_IMAGE_COLORMAP_SIZE.
-    * image->colormap_entries will be updated to the actual number of entries
-    * written to the colormap; this may be less than the original value.
-    */
-
-PNG_EXPORT(238, void, png_image_free, (png_imagep image));
-   /* Free any data allocated by libpng in image->opaque, setting the pointer to
-    * NULL.  May be called at any time after the structure is initialized.
-    */
-#endif /* SIMPLIFIED_READ */
-
-#ifdef PNG_SIMPLIFIED_WRITE_SUPPORTED
-/* WRITE APIS
- * ----------
- * For write you must initialize a png_image structure to describe the image to
- * be written.  To do this use memset to set the whole structure to 0 then
- * initialize fields describing your image.
- *
- * version: must be set to PNG_IMAGE_VERSION
- * opaque: must be initialized to NULL
- * width: image width in pixels
- * height: image height in rows
- * format: the format of the data (image and color-map) you wish to write
- * flags: set to 0 unless one of the defined flags applies; set
- *    PNG_IMAGE_FLAG_COLORSPACE_NOT_sRGB for color format images where the RGB
- *    values do not correspond to the colors in sRGB.
- * colormap_entries: set to the number of entries in the color-map (0 to 256)
- */
-#ifdef PNG_SIMPLIFIED_WRITE_STDIO_SUPPORTED
-PNG_EXPORT(239, int, png_image_write_to_file, (png_imagep image,
-   const char *file, int convert_to_8bit, const void *buffer,
-   png_int_32 row_stride, const void *colormap));
-   /* Write the image to the named file. */
-
-PNG_EXPORT(240, int, png_image_write_to_stdio, (png_imagep image, FILE *file,
-   int convert_to_8_bit, const void *buffer, png_int_32 row_stride,
-   const void *colormap));
-   /* Write the image to the given (FILE*). */
-#endif /* SIMPLIFIED_WRITE_STDIO */
-
-/* With all write APIs if image is in one of the linear formats with 16-bit
- * data then setting convert_to_8_bit will cause the output to be an 8-bit PNG
- * gamma encoded according to the sRGB specification, otherwise a 16-bit linear
- * encoded PNG file is written.
- *
- * With color-mapped data formats the colormap parameter point to a color-map
- * with at least image->colormap_entries encoded in the specified format.  If
- * the format is linear the written PNG color-map will be converted to sRGB
- * regardless of the convert_to_8_bit flag.
- *
- * With all APIs row_stride is handled as in the read APIs - it is the spacing
- * from one row to the next in component sized units (1 or 2 bytes) and if
- * negative indicates a bottom-up row layout in the buffer.  If row_stride is
- * zero, libpng will calculate it for you from the image width and number of
- * channels.
- *
- * Note that the write API does not support interlacing, sub-8-bit pixels or
- * most ancillary chunks.  If you need to write text chunks (e.g. for copyright
- * notices) you need to use one of the other APIs.
- */
-
-PNG_EXPORT(245, int, png_image_write_to_memory, (png_imagep image, void *memory,
-   png_alloc_size_t * PNG_RESTRICT memory_bytes, int convert_to_8_bit,
-   const void *buffer, png_int_32 row_stride, const void *colormap));
-   /* Write the image to the given memory buffer.  The function both writes the
-    * whole PNG data stream to *memory and updates *memory_bytes with the count
-    * of bytes written.
-    *
-    * 'memory' may be NULL.  In this case *memory_bytes is not read however on
-    * success the number of bytes which would have been written will still be
-    * stored in *memory_bytes.  On failure *memory_bytes will contain 0.
-    *
-    * If 'memory' is not NULL it must point to memory[*memory_bytes] of
-    * writeable memory.
-    *
-    * If the function returns success memory[*memory_bytes] (if 'memory' is not
-    * NULL) contains the written PNG data.  *memory_bytes will always be less
-    * than or equal to the original value.
-    *
-    * If the function returns false and *memory_bytes was not changed an error
-    * occurred during write.  If *memory_bytes was changed, or is not 0 if
-    * 'memory' was NULL, the write would have succeeded but for the memory
-    * buffer being too small.  *memory_bytes contains the required number of
-    * bytes and will be bigger that the original value.
-    */
-
-#define png_image_write_get_memory_size(image, size, convert_to_8_bit, buffer,\
-   row_stride, colormap)\
-   png_image_write_to_memory(&(image), 0, &(size), convert_to_8_bit, buffer,\
-         row_stride, colormap)
-   /* Return the amount of memory in 'size' required to compress this image.
-    * The png_image structure 'image' must be filled in as in the above
-    * function and must not be changed before the actual write call, the buffer
-    * and all other parameters must also be identical to that in the final
-    * write call.  The 'size' variable need not be initialized.
-    *
-    * NOTE: the macro returns true/false, if false is returned 'size' will be
-    * set to zero and the write failed and probably will fail if tried again.
-    */
-
-/* You can pre-allocate the buffer by making sure it is of sufficient size
- * regardless of the amount of compression achieved.  The buffer size will
- * always be bigger than the original image and it will never be filled.  The
- * following macros are provided to assist in allocating the buffer.
- */
-#define PNG_IMAGE_DATA_SIZE(image) (PNG_IMAGE_SIZE(image)+(image).height)
-   /* The number of uncompressed bytes in the PNG byte encoding of the image;
-    * uncompressing the PNG IDAT data will give this number of bytes.
-    *
-    * NOTE: while PNG_IMAGE_SIZE cannot overflow for an image in memory this
-    * macro can because of the extra bytes used in the PNG byte encoding.  You
-    * need to avoid this macro if your image size approaches 2^30 in width or
-    * height.  The same goes for the remainder of these macros; they all produce
-    * bigger numbers than the actual in-memory image size.
-    */
-#ifndef PNG_ZLIB_MAX_SIZE
-#  define PNG_ZLIB_MAX_SIZE(b) ((b)+(((b)+7U)>>3)+(((b)+63U)>>6)+11U)
-   /* An upper bound on the number of compressed bytes given 'b' uncompressed
-    * bytes.  This is based on deflateBounds() in zlib; different
-    * implementations of zlib compression may conceivably produce more data so
-    * if your zlib implementation is not zlib itself redefine this macro
-    * appropriately.
-    */
-#endif
-
-#define PNG_IMAGE_COMPRESSED_SIZE_MAX(image)\
-   PNG_ZLIB_MAX_SIZE((png_alloc_size_t)PNG_IMAGE_DATA_SIZE(image))
-   /* An upper bound on the size of the data in the PNG IDAT chunks. */
-
-#define PNG_IMAGE_PNG_SIZE_MAX_(image, image_size)\
-   ((8U/*sig*/+25U/*IHDR*/+16U/*gAMA*/+44U/*cHRM*/+12U/*IEND*/+\
-    (((image).format&PNG_FORMAT_FLAG_COLORMAP)?/*colormap: PLTE, tRNS*/\
-    12U+3U*(image).colormap_entries/*PLTE data*/+\
-    (((image).format&PNG_FORMAT_FLAG_ALPHA)?\
-    12U/*tRNS*/+(image).colormap_entries:0U):0U)+\
-    12U)+(12U*((image_size)/PNG_ZBUF_SIZE))/*IDAT*/+(image_size))
-   /* A helper for the following macro; if your compiler cannot handle the
-    * following macro use this one with the result of
-    * PNG_IMAGE_COMPRESSED_SIZE_MAX(image) as the second argument (most
-    * compilers should handle this just fine.)
-    */
-
-#define PNG_IMAGE_PNG_SIZE_MAX(image)\
-   PNG_IMAGE_PNG_SIZE_MAX_(image, PNG_IMAGE_COMPRESSED_SIZE_MAX(image))
-   /* An upper bound on the total length of the PNG data stream for 'image'.
-    * The result is of type png_alloc_size_t, on 32-bit systems this may
-    * overflow even though PNG_IMAGE_DATA_SIZE does not overflow; the write will
-    * run out of buffer space but return a corrected size which should work.
-    */
-#endif /* SIMPLIFIED_WRITE */
-/*******************************************************************************
- *  END OF SIMPLIFIED API
- ******************************************************************************/
-#endif /* SIMPLIFIED_{READ|WRITE} */
-
-/*******************************************************************************
- * Section 6: IMPLEMENTATION OPTIONS
- *******************************************************************************
- *
- * Support for arbitrary implementation-specific optimizations.  The API allows
- * particular options to be turned on or off.  'Option' is the number of the
- * option and 'onoff' is 0 (off) or non-0 (on).  The value returned is given
- * by the PNG_OPTION_ defines below.
- *
- * HARDWARE: normally hardware capabilities, such as the Intel SSE instructions,
- *           are detected at run time, however sometimes it may be impossible
- *           to do this in user mode, in which case it is necessary to discover
- *           the capabilities in an OS specific way.  Such capabilities are
- *           listed here when libpng has support for them and must be turned
- *           ON by the application if present.
- *
- * SOFTWARE: sometimes software optimizations actually result in performance
- *           decrease on some architectures or systems, or with some sets of
- *           PNG images.  'Software' options allow such optimizations to be
- *           selected at run time.
- */
-#ifdef PNG_SET_OPTION_SUPPORTED
-#ifdef PNG_ARM_NEON_API_SUPPORTED
-#  define PNG_ARM_NEON   0 /* HARDWARE: ARM Neon SIMD instructions supported */
-#endif
-#define PNG_MAXIMUM_INFLATE_WINDOW 2 /* SOFTWARE: force maximum window */
-#define PNG_SKIP_sRGB_CHECK_PROFILE 4 /* SOFTWARE: Check ICC profile for sRGB */
-#ifdef PNG_MIPS_MSA_API_SUPPORTED
-#  define PNG_MIPS_MSA   6 /* HARDWARE: MIPS Msa SIMD instructions supported */
-#endif
-#ifdef PNG_DISABLE_ADLER32_CHECK_SUPPORTED
-#  define PNG_IGNORE_ADLER32 8 /* SOFTWARE: disable Adler32 check on IDAT */
-#endif
-#ifdef PNG_POWERPC_VSX_API_SUPPORTED
-#  define PNG_POWERPC_VSX   10 /* HARDWARE: PowerPC VSX SIMD instructions
-                                * supported */
-#endif
-#ifdef PNG_MIPS_MMI_API_SUPPORTED
-#  define PNG_MIPS_MMI   12 /* HARDWARE: MIPS MMI SIMD instructions supported */
-#endif
-
-#define PNG_OPTION_NEXT  14 /* Next option - numbers must be even */
-
-/* Return values: NOTE: there are four values and 'off' is *not* zero */
-#define PNG_OPTION_UNSET   0 /* Unset - defaults to off */
-#define PNG_OPTION_INVALID 1 /* Option number out of range */
-#define PNG_OPTION_OFF     2
-#define PNG_OPTION_ON      3
-
-PNG_EXPORT(244, int, png_set_option, (png_structrp png_ptr, int option,
-   int onoff));
-#endif /* SET_OPTION */
-
-/*******************************************************************************
- *  END OF HARDWARE AND SOFTWARE OPTIONS
- ******************************************************************************/
-
-/* Maintainer: Put new public prototypes here ^, in libpng.3, in project
- * defs, and in scripts/symbols.def.
- */
-
-/* The last ordinal number (this is the *last* one already used; the next
- * one to use is one more than this.)
- */
-#ifdef PNG_EXPORT_LAST_ORDINAL
-  PNG_EXPORT_LAST_ORDINAL(249);
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* PNG_VERSION_INFO_ONLY */
-/* Do not put anything past this line */
-#endif /* PNG_H */
diff --git a/dep/libpng/include/pngconf.h b/dep/libpng/include/pngconf.h
deleted file mode 100644
index 000d7b1a8..000000000
--- a/dep/libpng/include/pngconf.h
+++ /dev/null
@@ -1,623 +0,0 @@
-
-/* pngconf.h - machine-configurable file for libpng
- *
- * libpng version 1.6.43
- *
- * Copyright (c) 2018-2024 Cosmin Truta
- * Copyright (c) 1998-2002,2004,2006-2016,2018 Glenn Randers-Pehrson
- * Copyright (c) 1996-1997 Andreas Dilger
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- *
- * Any machine specific code is near the front of this file, so if you
- * are configuring libpng for a machine, you may want to read the section
- * starting here down to where it starts to typedef png_color, png_text,
- * and png_info.
- */
-
-#ifndef PNGCONF_H
-#define PNGCONF_H
-
-#ifndef PNG_BUILDING_SYMBOL_TABLE /* else includes may cause problems */
-
-/* From libpng 1.6.0 libpng requires an ANSI X3.159-1989 ("ISOC90") compliant C
- * compiler for correct compilation.  The following header files are required by
- * the standard.  If your compiler doesn't provide these header files, or they
- * do not match the standard, you will need to provide/improve them.
- */
-#include <limits.h>
-#include <stddef.h>
-
-/* Library header files.  These header files are all defined by ISOC90; libpng
- * expects conformant implementations, however, an ISOC90 conformant system need
- * not provide these header files if the functionality cannot be implemented.
- * In this case it will be necessary to disable the relevant parts of libpng in
- * the build of pnglibconf.h.
- *
- * Prior to 1.6.0 string.h was included here; the API changes in 1.6.0 to not
- * include this unnecessary header file.
- */
-
-#ifdef PNG_STDIO_SUPPORTED
-   /* Required for the definition of FILE: */
-#  include <stdio.h>
-#endif
-
-#ifdef PNG_SETJMP_SUPPORTED
-   /* Required for the definition of jmp_buf and the declaration of longjmp: */
-#  include <setjmp.h>
-#endif
-
-#ifdef PNG_CONVERT_tIME_SUPPORTED
-   /* Required for struct tm: */
-#  include <time.h>
-#endif
-
-#endif /* PNG_BUILDING_SYMBOL_TABLE */
-
-/* Prior to 1.6.0, it was possible to turn off 'const' in declarations,
- * using PNG_NO_CONST.  This is no longer supported.
- */
-#define PNG_CONST const /* backward compatibility only */
-
-/* This controls optimization of the reading of 16-bit and 32-bit
- * values from PNG files.  It can be set on a per-app-file basis: it
- * just changes whether a macro is used when the function is called.
- * The library builder sets the default; if read functions are not
- * built into the library the macro implementation is forced on.
- */
-#ifndef PNG_READ_INT_FUNCTIONS_SUPPORTED
-#  define PNG_USE_READ_MACROS
-#endif
-#if !defined(PNG_NO_USE_READ_MACROS) && !defined(PNG_USE_READ_MACROS)
-#  if PNG_DEFAULT_READ_MACROS
-#    define PNG_USE_READ_MACROS
-#  endif
-#endif
-
-/* COMPILER SPECIFIC OPTIONS.
- *
- * These options are provided so that a variety of difficult compilers
- * can be used.  Some are fixed at build time (e.g. PNG_API_RULE
- * below) but still have compiler specific implementations, others
- * may be changed on a per-file basis when compiling against libpng.
- */
-
-/* The PNGARG macro was used in versions of libpng prior to 1.6.0 to protect
- * against legacy (pre ISOC90) compilers that did not understand function
- * prototypes.  It is not required for modern C compilers.
- */
-#ifndef PNGARG
-#  define PNGARG(arglist) arglist
-#endif
-
-/* Function calling conventions.
- * =============================
- * Normally it is not necessary to specify to the compiler how to call
- * a function - it just does it - however on x86 systems derived from
- * Microsoft and Borland C compilers ('IBM PC', 'DOS', 'Windows' systems
- * and some others) there are multiple ways to call a function and the
- * default can be changed on the compiler command line.  For this reason
- * libpng specifies the calling convention of every exported function and
- * every function called via a user supplied function pointer.  This is
- * done in this file by defining the following macros:
- *
- * PNGAPI    Calling convention for exported functions.
- * PNGCBAPI  Calling convention for user provided (callback) functions.
- * PNGCAPI   Calling convention used by the ANSI-C library (required
- *           for longjmp callbacks and sometimes used internally to
- *           specify the calling convention for zlib).
- *
- * These macros should never be overridden.  If it is necessary to
- * change calling convention in a private build this can be done
- * by setting PNG_API_RULE (which defaults to 0) to one of the values
- * below to select the correct 'API' variants.
- *
- * PNG_API_RULE=0 Use PNGCAPI - the 'C' calling convention - throughout.
- *                This is correct in every known environment.
- * PNG_API_RULE=1 Use the operating system convention for PNGAPI and
- *                the 'C' calling convention (from PNGCAPI) for
- *                callbacks (PNGCBAPI).  This is no longer required
- *                in any known environment - if it has to be used
- *                please post an explanation of the problem to the
- *                libpng mailing list.
- *
- * These cases only differ if the operating system does not use the C
- * calling convention, at present this just means the above cases
- * (x86 DOS/Windows systems) and, even then, this does not apply to
- * Cygwin running on those systems.
- *
- * Note that the value must be defined in pnglibconf.h so that what
- * the application uses to call the library matches the conventions
- * set when building the library.
- */
-
-/* Symbol export
- * =============
- * When building a shared library it is almost always necessary to tell
- * the compiler which symbols to export.  The png.h macro 'PNG_EXPORT'
- * is used to mark the symbols.  On some systems these symbols can be
- * extracted at link time and need no special processing by the compiler,
- * on other systems the symbols are flagged by the compiler and just
- * the declaration requires a special tag applied (unfortunately) in a
- * compiler dependent way.  Some systems can do either.
- *
- * A small number of older systems also require a symbol from a DLL to
- * be flagged to the program that calls it.  This is a problem because
- * we do not know in the header file included by application code that
- * the symbol will come from a shared library, as opposed to a statically
- * linked one.  For this reason the application must tell us by setting
- * the magic flag PNG_USE_DLL to turn on the special processing before
- * it includes png.h.
- *
- * Four additional macros are used to make this happen:
- *
- * PNG_IMPEXP The magic (if any) to cause a symbol to be exported from
- *            the build or imported if PNG_USE_DLL is set - compiler
- *            and system specific.
- *
- * PNG_EXPORT_TYPE(type) A macro that pre or appends PNG_IMPEXP to
- *                       'type', compiler specific.
- *
- * PNG_DLL_EXPORT Set to the magic to use during a libpng build to
- *                make a symbol exported from the DLL.  Not used in the
- *                public header files; see pngpriv.h for how it is used
- *                in the libpng build.
- *
- * PNG_DLL_IMPORT Set to the magic to force the libpng symbols to come
- *                from a DLL - used to define PNG_IMPEXP when
- *                PNG_USE_DLL is set.
- */
-
-/* System specific discovery.
- * ==========================
- * This code is used at build time to find PNG_IMPEXP, the API settings
- * and PNG_EXPORT_TYPE(), it may also set a macro to indicate the DLL
- * import processing is possible.  On Windows systems it also sets
- * compiler-specific macros to the values required to change the calling
- * conventions of the various functions.
- */
-#if defined(_WIN32) || defined(__WIN32__) || defined(__NT__) || \
-    defined(__CYGWIN__)
-  /* Windows system (DOS doesn't support DLLs).  Includes builds under Cygwin or
-   * MinGW on any architecture currently supported by Windows.  Also includes
-   * Watcom builds but these need special treatment because they are not
-   * compatible with GCC or Visual C because of different calling conventions.
-   */
-#  if PNG_API_RULE == 2
-   /* If this line results in an error, either because __watcall is not
-    * understood or because of a redefine just below you cannot use *this*
-    * build of the library with the compiler you are using.  *This* build was
-    * build using Watcom and applications must also be built using Watcom!
-    */
-#    define PNGCAPI __watcall
-#  endif
-
-#  if defined(__GNUC__) || (defined(_MSC_VER) && (_MSC_VER >= 800))
-#    define PNGCAPI __cdecl
-#    if PNG_API_RULE == 1
-   /* If this line results in an error __stdcall is not understood and
-    * PNG_API_RULE should not have been set to '1'.
-    */
-#      define PNGAPI __stdcall
-#    endif
-#  else
-   /* An older compiler, or one not detected (erroneously) above,
-    * if necessary override on the command line to get the correct
-    * variants for the compiler.
-    */
-#    ifndef PNGCAPI
-#      define PNGCAPI _cdecl
-#    endif
-#    if PNG_API_RULE == 1 && !defined(PNGAPI)
-#      define PNGAPI _stdcall
-#    endif
-#  endif /* compiler/api */
-
-  /* NOTE: PNGCBAPI always defaults to PNGCAPI. */
-
-#  if defined(PNGAPI) && !defined(PNG_USER_PRIVATEBUILD)
-#     error "PNG_USER_PRIVATEBUILD must be defined if PNGAPI is changed"
-#  endif
-
-#  if (defined(_MSC_VER) && _MSC_VER < 800) ||\
-      (defined(__BORLANDC__) && __BORLANDC__ < 0x500)
-   /* older Borland and MSC
-    * compilers used '__export' and required this to be after
-    * the type.
-    */
-#    ifndef PNG_EXPORT_TYPE
-#      define PNG_EXPORT_TYPE(type) type PNG_IMPEXP
-#    endif
-#    define PNG_DLL_EXPORT __export
-#  else /* newer compiler */
-#    define PNG_DLL_EXPORT __declspec(dllexport)
-#    ifndef PNG_DLL_IMPORT
-#      define PNG_DLL_IMPORT __declspec(dllimport)
-#    endif
-#  endif /* compiler */
-
-#else /* !Windows */
-#  if (defined(__IBMC__) || defined(__IBMCPP__)) && defined(__OS2__)
-#    define PNGAPI _System
-#  else /* !Windows/x86 && !OS/2 */
-   /* Use the defaults, or define PNG*API on the command line (but
-    * this will have to be done for every compile!)
-    */
-#  endif /* other system, !OS/2 */
-#endif /* !Windows/x86 */
-
-/* Now do all the defaulting . */
-#ifndef PNGCAPI
-#  define PNGCAPI
-#endif
-#ifndef PNGCBAPI
-#  define PNGCBAPI PNGCAPI
-#endif
-#ifndef PNGAPI
-#  define PNGAPI PNGCAPI
-#endif
-
-/* PNG_IMPEXP may be set on the compilation system command line or (if not set)
- * then in an internal header file when building the library, otherwise (when
- * using the library) it is set here.
- */
-#ifndef PNG_IMPEXP
-#  if defined(PNG_USE_DLL) && defined(PNG_DLL_IMPORT)
-   /* This forces use of a DLL, disallowing static linking */
-#    define PNG_IMPEXP PNG_DLL_IMPORT
-#  endif
-
-#  ifndef PNG_IMPEXP
-#    define PNG_IMPEXP
-#  endif
-#endif
-
-/* In 1.5.2 the definition of PNG_FUNCTION has been changed to always treat
- * 'attributes' as a storage class - the attributes go at the start of the
- * function definition, and attributes are always appended regardless of the
- * compiler.  This considerably simplifies these macros but may cause problems
- * if any compilers both need function attributes and fail to handle them as
- * a storage class (this is unlikely.)
- */
-#ifndef PNG_FUNCTION
-#  define PNG_FUNCTION(type, name, args, attributes) attributes type name args
-#endif
-
-#ifndef PNG_EXPORT_TYPE
-#  define PNG_EXPORT_TYPE(type) PNG_IMPEXP type
-#endif
-
-   /* The ordinal value is only relevant when preprocessing png.h for symbol
-    * table entries, so we discard it here.  See the .dfn files in the
-    * scripts directory.
-    */
-
-#ifndef PNG_EXPORTA
-#  define PNG_EXPORTA(ordinal, type, name, args, attributes) \
-      PNG_FUNCTION(PNG_EXPORT_TYPE(type), (PNGAPI name), PNGARG(args), \
-      PNG_LINKAGE_API attributes)
-#endif
-
-/* ANSI-C (C90) does not permit a macro to be invoked with an empty argument,
- * so make something non-empty to satisfy the requirement:
- */
-#define PNG_EMPTY /*empty list*/
-
-#define PNG_EXPORT(ordinal, type, name, args) \
-   PNG_EXPORTA(ordinal, type, name, args, PNG_EMPTY)
-
-/* Use PNG_REMOVED to comment out a removed interface. */
-#ifndef PNG_REMOVED
-#  define PNG_REMOVED(ordinal, type, name, args, attributes)
-#endif
-
-#ifndef PNG_CALLBACK
-#  define PNG_CALLBACK(type, name, args) type (PNGCBAPI name) PNGARG(args)
-#endif
-
-/* Support for compiler specific function attributes.  These are used
- * so that where compiler support is available incorrect use of API
- * functions in png.h will generate compiler warnings.
- *
- * Added at libpng-1.2.41.
- */
-
-#ifndef PNG_NO_PEDANTIC_WARNINGS
-#  ifndef PNG_PEDANTIC_WARNINGS_SUPPORTED
-#    define PNG_PEDANTIC_WARNINGS_SUPPORTED
-#  endif
-#endif
-
-#ifdef PNG_PEDANTIC_WARNINGS_SUPPORTED
-  /* Support for compiler specific function attributes.  These are used
-   * so that where compiler support is available, incorrect use of API
-   * functions in png.h will generate compiler warnings.  Added at libpng
-   * version 1.2.41.  Disabling these removes the warnings but may also produce
-   * less efficient code.
-   */
-#  if defined(__clang__) && defined(__has_attribute)
-   /* Clang defines both __clang__ and __GNUC__. Check __clang__ first. */
-#    if !defined(PNG_USE_RESULT) && __has_attribute(__warn_unused_result__)
-#      define PNG_USE_RESULT __attribute__((__warn_unused_result__))
-#    endif
-#    if !defined(PNG_NORETURN) && __has_attribute(__noreturn__)
-#      define PNG_NORETURN __attribute__((__noreturn__))
-#    endif
-#    if !defined(PNG_ALLOCATED) && __has_attribute(__malloc__)
-#      define PNG_ALLOCATED __attribute__((__malloc__))
-#    endif
-#    if !defined(PNG_DEPRECATED) && __has_attribute(__deprecated__)
-#      define PNG_DEPRECATED __attribute__((__deprecated__))
-#    endif
-#    if !defined(PNG_PRIVATE)
-#      ifdef __has_extension
-#        if __has_extension(attribute_unavailable_with_message)
-#          define PNG_PRIVATE __attribute__((__unavailable__(\
-             "This function is not exported by libpng.")))
-#        endif
-#      endif
-#    endif
-#    ifndef PNG_RESTRICT
-#      define PNG_RESTRICT __restrict
-#    endif
-
-#  elif defined(__GNUC__)
-#    ifndef PNG_USE_RESULT
-#      define PNG_USE_RESULT __attribute__((__warn_unused_result__))
-#    endif
-#    ifndef PNG_NORETURN
-#      define PNG_NORETURN   __attribute__((__noreturn__))
-#    endif
-#    if __GNUC__ >= 3
-#      ifndef PNG_ALLOCATED
-#        define PNG_ALLOCATED  __attribute__((__malloc__))
-#      endif
-#      ifndef PNG_DEPRECATED
-#        define PNG_DEPRECATED __attribute__((__deprecated__))
-#      endif
-#      ifndef PNG_PRIVATE
-#        if 0 /* Doesn't work so we use deprecated instead*/
-#          define PNG_PRIVATE \
-            __attribute__((warning("This function is not exported by libpng.")))
-#        else
-#          define PNG_PRIVATE \
-            __attribute__((__deprecated__))
-#        endif
-#      endif
-#      if ((__GNUC__ > 3) || !defined(__GNUC_MINOR__) || (__GNUC_MINOR__ >= 1))
-#        ifndef PNG_RESTRICT
-#          define PNG_RESTRICT __restrict
-#        endif
-#      endif /* __GNUC__.__GNUC_MINOR__ > 3.0 */
-#    endif /* __GNUC__ >= 3 */
-
-#  elif defined(_MSC_VER)  && (_MSC_VER >= 1300)
-#    ifndef PNG_USE_RESULT
-#      define PNG_USE_RESULT /* not supported */
-#    endif
-#    ifndef PNG_NORETURN
-#      define PNG_NORETURN   __declspec(noreturn)
-#    endif
-#    ifndef PNG_ALLOCATED
-#      if (_MSC_VER >= 1400)
-#        define PNG_ALLOCATED __declspec(restrict)
-#      endif
-#    endif
-#    ifndef PNG_DEPRECATED
-#      define PNG_DEPRECATED __declspec(deprecated)
-#    endif
-#    ifndef PNG_PRIVATE
-#      define PNG_PRIVATE __declspec(deprecated)
-#    endif
-#    ifndef PNG_RESTRICT
-#      if (_MSC_VER >= 1400)
-#        define PNG_RESTRICT __restrict
-#      endif
-#    endif
-
-#  elif defined(__WATCOMC__)
-#    ifndef PNG_RESTRICT
-#      define PNG_RESTRICT __restrict
-#    endif
-#  endif
-#endif /* PNG_PEDANTIC_WARNINGS */
-
-#ifndef PNG_DEPRECATED
-#  define PNG_DEPRECATED  /* Use of this function is deprecated */
-#endif
-#ifndef PNG_USE_RESULT
-#  define PNG_USE_RESULT  /* The result of this function must be checked */
-#endif
-#ifndef PNG_NORETURN
-#  define PNG_NORETURN    /* This function does not return */
-#endif
-#ifndef PNG_ALLOCATED
-#  define PNG_ALLOCATED   /* The result of the function is new memory */
-#endif
-#ifndef PNG_PRIVATE
-#  define PNG_PRIVATE     /* This is a private libpng function */
-#endif
-#ifndef PNG_RESTRICT
-#  define PNG_RESTRICT    /* The C99 "restrict" feature */
-#endif
-
-#ifndef PNG_FP_EXPORT     /* A floating point API. */
-#  ifdef PNG_FLOATING_POINT_SUPPORTED
-#     define PNG_FP_EXPORT(ordinal, type, name, args)\
-         PNG_EXPORT(ordinal, type, name, args);
-#  else                   /* No floating point APIs */
-#     define PNG_FP_EXPORT(ordinal, type, name, args)
-#  endif
-#endif
-#ifndef PNG_FIXED_EXPORT  /* A fixed point API. */
-#  ifdef PNG_FIXED_POINT_SUPPORTED
-#     define PNG_FIXED_EXPORT(ordinal, type, name, args)\
-         PNG_EXPORT(ordinal, type, name, args);
-#  else                   /* No fixed point APIs */
-#     define PNG_FIXED_EXPORT(ordinal, type, name, args)
-#  endif
-#endif
-
-#ifndef PNG_BUILDING_SYMBOL_TABLE
-/* Some typedefs to get us started.  These should be safe on most of the common
- * platforms.
- *
- * png_uint_32 and png_int_32 may, currently, be larger than required to hold a
- * 32-bit value however this is not normally advisable.
- *
- * png_uint_16 and png_int_16 should always be two bytes in size - this is
- * verified at library build time.
- *
- * png_byte must always be one byte in size.
- *
- * The checks below use constants from limits.h, as defined by the ISOC90
- * standard.
- */
-#if CHAR_BIT == 8 && UCHAR_MAX == 255
-   typedef unsigned char png_byte;
-#else
-#  error "libpng requires 8-bit bytes"
-#endif
-
-#if INT_MIN == -32768 && INT_MAX == 32767
-   typedef int png_int_16;
-#elif SHRT_MIN == -32768 && SHRT_MAX == 32767
-   typedef short png_int_16;
-#else
-#  error "libpng requires a signed 16-bit type"
-#endif
-
-#if UINT_MAX == 65535
-   typedef unsigned int png_uint_16;
-#elif USHRT_MAX == 65535
-   typedef unsigned short png_uint_16;
-#else
-#  error "libpng requires an unsigned 16-bit type"
-#endif
-
-#if INT_MIN < -2147483646 && INT_MAX > 2147483646
-   typedef int png_int_32;
-#elif LONG_MIN < -2147483646 && LONG_MAX > 2147483646
-   typedef long int png_int_32;
-#else
-#  error "libpng requires a signed 32-bit (or more) type"
-#endif
-
-#if UINT_MAX > 4294967294U
-   typedef unsigned int png_uint_32;
-#elif ULONG_MAX > 4294967294U
-   typedef unsigned long int png_uint_32;
-#else
-#  error "libpng requires an unsigned 32-bit (or more) type"
-#endif
-
-/* Prior to 1.6.0, it was possible to disable the use of size_t and ptrdiff_t.
- * From 1.6.0 onwards, an ISO C90 compiler, as well as a standard-compliant
- * behavior of sizeof and ptrdiff_t are required.
- * The legacy typedefs are provided here for backwards compatibility.
- */
-typedef size_t png_size_t;
-typedef ptrdiff_t png_ptrdiff_t;
-
-/* libpng needs to know the maximum value of 'size_t' and this controls the
- * definition of png_alloc_size_t, below.  This maximum value of size_t limits
- * but does not control the maximum allocations the library makes - there is
- * direct application control of this through png_set_user_limits().
- */
-#ifndef PNG_SMALL_SIZE_T
-   /* Compiler specific tests for systems where size_t is known to be less than
-    * 32 bits (some of these systems may no longer work because of the lack of
-    * 'far' support; see above.)
-    */
-#  if (defined(__TURBOC__) && !defined(__FLAT__)) ||\
-   (defined(_MSC_VER) && defined(MAXSEG_64K))
-#     define PNG_SMALL_SIZE_T
-#  endif
-#endif
-
-/* png_alloc_size_t is guaranteed to be no smaller than size_t, and no smaller
- * than png_uint_32.  Casts from size_t or png_uint_32 to png_alloc_size_t are
- * not necessary; in fact, it is recommended not to use them at all, so that
- * the compiler can complain when something turns out to be problematic.
- *
- * Casts in the other direction (from png_alloc_size_t to size_t or
- * png_uint_32) should be explicitly applied; however, we do not expect to
- * encounter practical situations that require such conversions.
- *
- * PNG_SMALL_SIZE_T must be defined if the maximum value of size_t is less than
- * 4294967295 - i.e. less than the maximum value of png_uint_32.
- */
-#ifdef PNG_SMALL_SIZE_T
-   typedef png_uint_32 png_alloc_size_t;
-#else
-   typedef size_t png_alloc_size_t;
-#endif
-
-/* Prior to 1.6.0 libpng offered limited support for Microsoft C compiler
- * implementations of Intel CPU specific support of user-mode segmented address
- * spaces, where 16-bit pointers address more than 65536 bytes of memory using
- * separate 'segment' registers.  The implementation requires two different
- * types of pointer (only one of which includes the segment value.)
- *
- * If required this support is available in version 1.2 of libpng and may be
- * available in versions through 1.5, although the correctness of the code has
- * not been verified recently.
- */
-
-/* Typedef for floating-point numbers that are converted to fixed-point with a
- * multiple of 100,000, e.g., gamma
- */
-typedef png_int_32 png_fixed_point;
-
-/* Add typedefs for pointers */
-typedef void                  * png_voidp;
-typedef const void            * png_const_voidp;
-typedef png_byte              * png_bytep;
-typedef const png_byte        * png_const_bytep;
-typedef png_uint_32           * png_uint_32p;
-typedef const png_uint_32     * png_const_uint_32p;
-typedef png_int_32            * png_int_32p;
-typedef const png_int_32      * png_const_int_32p;
-typedef png_uint_16           * png_uint_16p;
-typedef const png_uint_16     * png_const_uint_16p;
-typedef png_int_16            * png_int_16p;
-typedef const png_int_16      * png_const_int_16p;
-typedef char                  * png_charp;
-typedef const char            * png_const_charp;
-typedef png_fixed_point       * png_fixed_point_p;
-typedef const png_fixed_point * png_const_fixed_point_p;
-typedef size_t                * png_size_tp;
-typedef const size_t          * png_const_size_tp;
-
-#ifdef PNG_STDIO_SUPPORTED
-typedef FILE            * png_FILE_p;
-#endif
-
-#ifdef PNG_FLOATING_POINT_SUPPORTED
-typedef double       * png_doublep;
-typedef const double * png_const_doublep;
-#endif
-
-/* Pointers to pointers; i.e. arrays */
-typedef png_byte        * * png_bytepp;
-typedef png_uint_32     * * png_uint_32pp;
-typedef png_int_32      * * png_int_32pp;
-typedef png_uint_16     * * png_uint_16pp;
-typedef png_int_16      * * png_int_16pp;
-typedef const char      * * png_const_charpp;
-typedef char            * * png_charpp;
-typedef png_fixed_point * * png_fixed_point_pp;
-#ifdef PNG_FLOATING_POINT_SUPPORTED
-typedef double          * * png_doublepp;
-#endif
-
-/* Pointers to pointers to pointers; i.e., pointer to array */
-typedef char            * * * png_charppp;
-
-#endif /* PNG_BUILDING_SYMBOL_TABLE */
-
-#endif /* PNGCONF_H */
diff --git a/dep/libpng/include/pnglibconf.h b/dep/libpng/include/pnglibconf.h
deleted file mode 100644
index 83f09fbe7..000000000
--- a/dep/libpng/include/pnglibconf.h
+++ /dev/null
@@ -1,224 +0,0 @@
-/* pnglibconf.h - library build configuration */
-
-/* libpng version 1.6.43 */
-
-/* Copyright (c) 2018-2024 Cosmin Truta */
-/* Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson */
-
-/* This code is released under the libpng license. */
-/* For conditions of distribution and use, see the disclaimer */
-/* and license in png.h */
-
-/* pnglibconf.h */
-/* Machine generated file: DO NOT EDIT */
-/* Derived from: scripts/pnglibconf.dfa */
-#ifndef PNGLCONF_H
-#define PNGLCONF_H
-/* options */
-#define PNG_16BIT_SUPPORTED
-#define PNG_ALIGNED_MEMORY_SUPPORTED
-/*#undef PNG_ARM_NEON_API_SUPPORTED*/
-/*#undef PNG_ARM_NEON_CHECK_SUPPORTED*/
-#define PNG_BENIGN_ERRORS_SUPPORTED
-#define PNG_BENIGN_READ_ERRORS_SUPPORTED
-/*#undef PNG_BENIGN_WRITE_ERRORS_SUPPORTED*/
-#define PNG_BUILD_GRAYSCALE_PALETTE_SUPPORTED
-#define PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED
-#define PNG_COLORSPACE_SUPPORTED
-#define PNG_CONSOLE_IO_SUPPORTED
-#define PNG_CONVERT_tIME_SUPPORTED
-/*#undef PNG_DISABLE_ADLER32_CHECK_SUPPORTED*/
-#define PNG_EASY_ACCESS_SUPPORTED
-/*#undef PNG_ERROR_NUMBERS_SUPPORTED*/
-#define PNG_ERROR_TEXT_SUPPORTED
-#define PNG_FIXED_POINT_SUPPORTED
-#define PNG_FLOATING_ARITHMETIC_SUPPORTED
-#define PNG_FLOATING_POINT_SUPPORTED
-#define PNG_FORMAT_AFIRST_SUPPORTED
-#define PNG_FORMAT_BGR_SUPPORTED
-#define PNG_GAMMA_SUPPORTED
-#define PNG_GET_PALETTE_MAX_SUPPORTED
-#define PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-#define PNG_INCH_CONVERSIONS_SUPPORTED
-#define PNG_INFO_IMAGE_SUPPORTED
-#define PNG_IO_STATE_SUPPORTED
-/*#undef PNG_MIPS_MMI_API_SUPPORTED*/
-/*#undef PNG_MIPS_MMI_CHECK_SUPPORTED*/
-/*#undef PNG_MIPS_MSA_API_SUPPORTED*/
-/*#undef PNG_MIPS_MSA_CHECK_SUPPORTED*/
-#define PNG_MNG_FEATURES_SUPPORTED
-#define PNG_POINTER_INDEXING_SUPPORTED
-/*#undef PNG_POWERPC_VSX_API_SUPPORTED*/
-/*#undef PNG_POWERPC_VSX_CHECK_SUPPORTED*/
-#define PNG_PROGRESSIVE_READ_SUPPORTED
-#define PNG_READ_16BIT_SUPPORTED
-#define PNG_READ_ALPHA_MODE_SUPPORTED
-#define PNG_READ_ANCILLARY_CHUNKS_SUPPORTED
-#define PNG_READ_BACKGROUND_SUPPORTED
-#define PNG_READ_BGR_SUPPORTED
-#define PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED
-#define PNG_READ_COMPOSITE_NODIV_SUPPORTED
-#define PNG_READ_COMPRESSED_TEXT_SUPPORTED
-#define PNG_READ_EXPAND_16_SUPPORTED
-#define PNG_READ_EXPAND_SUPPORTED
-#define PNG_READ_FILLER_SUPPORTED
-#define PNG_READ_GAMMA_SUPPORTED
-#define PNG_READ_GET_PALETTE_MAX_SUPPORTED
-#define PNG_READ_GRAY_TO_RGB_SUPPORTED
-#define PNG_READ_INTERLACING_SUPPORTED
-#define PNG_READ_INT_FUNCTIONS_SUPPORTED
-#define PNG_READ_INVERT_ALPHA_SUPPORTED
-#define PNG_READ_INVERT_SUPPORTED
-#define PNG_READ_OPT_PLTE_SUPPORTED
-#define PNG_READ_PACKSWAP_SUPPORTED
-#define PNG_READ_PACK_SUPPORTED
-#define PNG_READ_QUANTIZE_SUPPORTED
-#define PNG_READ_RGB_TO_GRAY_SUPPORTED
-#define PNG_READ_SCALE_16_TO_8_SUPPORTED
-#define PNG_READ_SHIFT_SUPPORTED
-#define PNG_READ_STRIP_16_TO_8_SUPPORTED
-#define PNG_READ_STRIP_ALPHA_SUPPORTED
-#define PNG_READ_SUPPORTED
-#define PNG_READ_SWAP_ALPHA_SUPPORTED
-#define PNG_READ_SWAP_SUPPORTED
-#define PNG_READ_TEXT_SUPPORTED
-#define PNG_READ_TRANSFORMS_SUPPORTED
-#define PNG_READ_UNKNOWN_CHUNKS_SUPPORTED
-#define PNG_READ_USER_CHUNKS_SUPPORTED
-#define PNG_READ_USER_TRANSFORM_SUPPORTED
-#define PNG_READ_bKGD_SUPPORTED
-#define PNG_READ_cHRM_SUPPORTED
-#define PNG_READ_eXIf_SUPPORTED
-#define PNG_READ_gAMA_SUPPORTED
-#define PNG_READ_hIST_SUPPORTED
-#define PNG_READ_iCCP_SUPPORTED
-#define PNG_READ_iTXt_SUPPORTED
-#define PNG_READ_oFFs_SUPPORTED
-#define PNG_READ_pCAL_SUPPORTED
-#define PNG_READ_pHYs_SUPPORTED
-#define PNG_READ_sBIT_SUPPORTED
-#define PNG_READ_sCAL_SUPPORTED
-#define PNG_READ_sPLT_SUPPORTED
-#define PNG_READ_sRGB_SUPPORTED
-#define PNG_READ_tEXt_SUPPORTED
-#define PNG_READ_tIME_SUPPORTED
-#define PNG_READ_tRNS_SUPPORTED
-#define PNG_READ_zTXt_SUPPORTED
-#define PNG_SAVE_INT_32_SUPPORTED
-#define PNG_SAVE_UNKNOWN_CHUNKS_SUPPORTED
-#define PNG_SEQUENTIAL_READ_SUPPORTED
-#define PNG_SETJMP_SUPPORTED
-#define PNG_SET_OPTION_SUPPORTED
-#define PNG_SET_UNKNOWN_CHUNKS_SUPPORTED
-#define PNG_SET_USER_LIMITS_SUPPORTED
-#define PNG_SIMPLIFIED_READ_AFIRST_SUPPORTED
-#define PNG_SIMPLIFIED_READ_BGR_SUPPORTED
-#define PNG_SIMPLIFIED_READ_SUPPORTED
-#define PNG_SIMPLIFIED_WRITE_AFIRST_SUPPORTED
-#define PNG_SIMPLIFIED_WRITE_BGR_SUPPORTED
-#define PNG_SIMPLIFIED_WRITE_STDIO_SUPPORTED
-#define PNG_SIMPLIFIED_WRITE_SUPPORTED
-#define PNG_STDIO_SUPPORTED
-#define PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED
-#define PNG_TEXT_SUPPORTED
-#define PNG_TIME_RFC1123_SUPPORTED
-#define PNG_UNKNOWN_CHUNKS_SUPPORTED
-#define PNG_USER_CHUNKS_SUPPORTED
-#define PNG_USER_LIMITS_SUPPORTED
-#define PNG_USER_MEM_SUPPORTED
-#define PNG_USER_TRANSFORM_INFO_SUPPORTED
-#define PNG_USER_TRANSFORM_PTR_SUPPORTED
-#define PNG_WARNINGS_SUPPORTED
-#define PNG_WRITE_16BIT_SUPPORTED
-#define PNG_WRITE_ANCILLARY_CHUNKS_SUPPORTED
-#define PNG_WRITE_BGR_SUPPORTED
-#define PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED
-#define PNG_WRITE_COMPRESSED_TEXT_SUPPORTED
-#define PNG_WRITE_CUSTOMIZE_COMPRESSION_SUPPORTED
-#define PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED
-#define PNG_WRITE_FILLER_SUPPORTED
-#define PNG_WRITE_FILTER_SUPPORTED
-#define PNG_WRITE_FLUSH_SUPPORTED
-#define PNG_WRITE_GET_PALETTE_MAX_SUPPORTED
-#define PNG_WRITE_INTERLACING_SUPPORTED
-#define PNG_WRITE_INT_FUNCTIONS_SUPPORTED
-#define PNG_WRITE_INVERT_ALPHA_SUPPORTED
-#define PNG_WRITE_INVERT_SUPPORTED
-#define PNG_WRITE_OPTIMIZE_CMF_SUPPORTED
-#define PNG_WRITE_PACKSWAP_SUPPORTED
-#define PNG_WRITE_PACK_SUPPORTED
-#define PNG_WRITE_SHIFT_SUPPORTED
-#define PNG_WRITE_SUPPORTED
-#define PNG_WRITE_SWAP_ALPHA_SUPPORTED
-#define PNG_WRITE_SWAP_SUPPORTED
-#define PNG_WRITE_TEXT_SUPPORTED
-#define PNG_WRITE_TRANSFORMS_SUPPORTED
-#define PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
-#define PNG_WRITE_USER_TRANSFORM_SUPPORTED
-#define PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
-#define PNG_WRITE_bKGD_SUPPORTED
-#define PNG_WRITE_cHRM_SUPPORTED
-#define PNG_WRITE_eXIf_SUPPORTED
-#define PNG_WRITE_gAMA_SUPPORTED
-#define PNG_WRITE_hIST_SUPPORTED
-#define PNG_WRITE_iCCP_SUPPORTED
-#define PNG_WRITE_iTXt_SUPPORTED
-#define PNG_WRITE_oFFs_SUPPORTED
-#define PNG_WRITE_pCAL_SUPPORTED
-#define PNG_WRITE_pHYs_SUPPORTED
-#define PNG_WRITE_sBIT_SUPPORTED
-#define PNG_WRITE_sCAL_SUPPORTED
-#define PNG_WRITE_sPLT_SUPPORTED
-#define PNG_WRITE_sRGB_SUPPORTED
-#define PNG_WRITE_tEXt_SUPPORTED
-#define PNG_WRITE_tIME_SUPPORTED
-#define PNG_WRITE_tRNS_SUPPORTED
-#define PNG_WRITE_zTXt_SUPPORTED
-#define PNG_bKGD_SUPPORTED
-#define PNG_cHRM_SUPPORTED
-#define PNG_eXIf_SUPPORTED
-#define PNG_gAMA_SUPPORTED
-#define PNG_hIST_SUPPORTED
-#define PNG_iCCP_SUPPORTED
-#define PNG_iTXt_SUPPORTED
-#define PNG_oFFs_SUPPORTED
-#define PNG_pCAL_SUPPORTED
-#define PNG_pHYs_SUPPORTED
-#define PNG_sBIT_SUPPORTED
-#define PNG_sCAL_SUPPORTED
-#define PNG_sPLT_SUPPORTED
-#define PNG_sRGB_SUPPORTED
-#define PNG_tEXt_SUPPORTED
-#define PNG_tIME_SUPPORTED
-#define PNG_tRNS_SUPPORTED
-#define PNG_zTXt_SUPPORTED
-/* end of options */
-/* settings */
-#define PNG_API_RULE 0
-#define PNG_DEFAULT_READ_MACROS 1
-#define PNG_GAMMA_THRESHOLD_FIXED 5000
-#define PNG_IDAT_READ_SIZE PNG_ZBUF_SIZE
-#define PNG_INFLATE_BUF_SIZE 1024
-#define PNG_LINKAGE_API extern
-#define PNG_LINKAGE_CALLBACK extern
-#define PNG_LINKAGE_DATA extern
-#define PNG_LINKAGE_FUNCTION extern
-#define PNG_MAX_GAMMA_8 11
-#define PNG_QUANTIZE_BLUE_BITS 5
-#define PNG_QUANTIZE_GREEN_BITS 5
-#define PNG_QUANTIZE_RED_BITS 5
-#define PNG_TEXT_Z_DEFAULT_COMPRESSION (-1)
-#define PNG_TEXT_Z_DEFAULT_STRATEGY 0
-#define PNG_USER_CHUNK_CACHE_MAX 1000
-#define PNG_USER_CHUNK_MALLOC_MAX 8000000
-#define PNG_USER_HEIGHT_MAX 1000000
-#define PNG_USER_WIDTH_MAX 1000000
-#define PNG_ZBUF_SIZE 8192
-#define PNG_ZLIB_VERNUM 0 /* unknown */
-#define PNG_Z_DEFAULT_COMPRESSION (-1)
-#define PNG_Z_DEFAULT_NOFILTER_STRATEGY 0
-#define PNG_Z_DEFAULT_STRATEGY 1
-#define PNG_sCAL_PRECISION 5
-#define PNG_sRGB_PROFILE_CHECKS 2
-/* end of settings */
-#endif /* PNGLCONF_H */
diff --git a/dep/libpng/libpng.vcxproj b/dep/libpng/libpng.vcxproj
deleted file mode 100644
index 219b94786..000000000
--- a/dep/libpng/libpng.vcxproj
+++ /dev/null
@@ -1,61 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <Import Project="..\msvc\vsprops\Configurations.props" />
-  <PropertyGroup Label="Globals">
-    <ProjectGuid>{9FD2ABCD-2DCD-4302-BE5C-DF0BA8431FA5}</ProjectGuid>
-  </PropertyGroup>
-  <ItemGroup>
-    <ClInclude Include="include\png.h" />
-    <ClInclude Include="include\pngconf.h" />
-    <ClInclude Include="include\pnglibconf.h" />
-    <ClInclude Include="src\pngdebug.h" />
-    <ClInclude Include="src\pnginfo.h" />
-    <ClInclude Include="src\pngpriv.h" />
-    <ClInclude Include="src\pngstruct.h" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="src\arm\arm_init.c">
-      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="src\arm\filter_neon_intrinsics.c">
-      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="src\arm\palette_neon_intrinsics.c">
-      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="src\intel\filter_sse2_intrinsics.c">
-      <ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="src\intel\intel_init.c">
-      <ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="src\png.c" />
-    <ClCompile Include="src\pngerror.c" />
-    <ClCompile Include="src\pngget.c" />
-    <ClCompile Include="src\pngmem.c" />
-    <ClCompile Include="src\pngpread.c" />
-    <ClCompile Include="src\pngread.c" />
-    <ClCompile Include="src\pngrio.c" />
-    <ClCompile Include="src\pngrtran.c" />
-    <ClCompile Include="src\pngrutil.c" />
-    <ClCompile Include="src\pngset.c" />
-    <ClCompile Include="src\pngtrans.c" />
-    <ClCompile Include="src\pngwio.c" />
-    <ClCompile Include="src\pngwrite.c" />
-    <ClCompile Include="src\pngwtran.c" />
-    <ClCompile Include="src\pngwutil.c" />
-  </ItemGroup>
-  <ItemGroup>
-    <ProjectReference Include="..\zlib\zlib.vcxproj">
-      <Project>{7ff9fdb9-d504-47db-a16a-b08071999620}</Project>
-    </ProjectReference>
-  </ItemGroup>
-  <Import Project="..\msvc\vsprops\StaticLibrary.props" />
-  <ItemDefinitionGroup>
-    <ClCompile>
-      <WarningLevel>TurnOffAllWarnings</WarningLevel>
-      <AdditionalIncludeDirectories>$(ProjectDir)include;$(ProjectDir)src;$(ProjectDir)..\zlib\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-    </ClCompile>
-  </ItemDefinitionGroup>
-  <Import Project="..\msvc\vsprops\Targets.props" />
-</Project>
\ No newline at end of file
diff --git a/dep/libpng/libpng.vcxproj.filters b/dep/libpng/libpng.vcxproj.filters
deleted file mode 100644
index 9b5b8ef17..000000000
--- a/dep/libpng/libpng.vcxproj.filters
+++ /dev/null
@@ -1,52 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup>
-    <ClInclude Include="include\pngconf.h" />
-    <ClInclude Include="include\png.h" />
-    <ClInclude Include="src\pngdebug.h" />
-    <ClInclude Include="src\pnginfo.h" />
-    <ClInclude Include="src\pngpriv.h" />
-    <ClInclude Include="src\pngstruct.h" />
-    <ClInclude Include="include\pnglibconf.h" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="src\png.c" />
-    <ClCompile Include="src\pngerror.c" />
-    <ClCompile Include="src\pngget.c" />
-    <ClCompile Include="src\pngmem.c" />
-    <ClCompile Include="src\pngpread.c" />
-    <ClCompile Include="src\pngread.c" />
-    <ClCompile Include="src\pngrio.c" />
-    <ClCompile Include="src\pngrtran.c" />
-    <ClCompile Include="src\pngrutil.c" />
-    <ClCompile Include="src\pngset.c" />
-    <ClCompile Include="src\pngtrans.c" />
-    <ClCompile Include="src\pngwio.c" />
-    <ClCompile Include="src\pngwrite.c" />
-    <ClCompile Include="src\pngwtran.c" />
-    <ClCompile Include="src\pngwutil.c" />
-    <ClCompile Include="src\intel\intel_init.c">
-      <Filter>intel</Filter>
-    </ClCompile>
-    <ClCompile Include="src\intel\filter_sse2_intrinsics.c">
-      <Filter>intel</Filter>
-    </ClCompile>
-    <ClCompile Include="src\arm\filter_neon_intrinsics.c">
-      <Filter>arm</Filter>
-    </ClCompile>
-    <ClCompile Include="src\arm\palette_neon_intrinsics.c">
-      <Filter>arm</Filter>
-    </ClCompile>
-    <ClCompile Include="src\arm\arm_init.c">
-      <Filter>arm</Filter>
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <Filter Include="arm">
-      <UniqueIdentifier>{9f24e95e-025d-4ed8-8c41-2fb1c7a36026}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="intel">
-      <UniqueIdentifier>{8316b9c1-8c00-4bc8-ace7-c9b864890f2d}</UniqueIdentifier>
-    </Filter>
-  </ItemGroup>
-</Project>
\ No newline at end of file
diff --git a/dep/libpng/src/arm/arm_init.c b/dep/libpng/src/arm/arm_init.c
deleted file mode 100644
index 84d05556f..000000000
--- a/dep/libpng/src/arm/arm_init.c
+++ /dev/null
@@ -1,139 +0,0 @@
-
-/* arm_init.c - NEON optimised filter functions
- *
- * Copyright (c) 2018-2022 Cosmin Truta
- * Copyright (c) 2014,2016 Glenn Randers-Pehrson
- * Written by Mans Rullgard, 2011.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-/* This module requires POSIX 1003.1 functions. */
-#define _POSIX_SOURCE 1
-
-#include "../pngpriv.h"
-
-#ifdef PNG_READ_SUPPORTED
-
-#if PNG_ARM_NEON_OPT > 0
-#ifdef PNG_ARM_NEON_CHECK_SUPPORTED /* Do run-time checks */
-/* WARNING: it is strongly recommended that you do not build libpng with
- * run-time checks for CPU features if at all possible.  In the case of the ARM
- * NEON instructions there is no processor-specific way of detecting the
- * presence of the required support, therefore run-time detection is extremely
- * OS specific.
- *
- * You may set the macro PNG_ARM_NEON_FILE to the file name of file containing
- * a fragment of C source code which defines the png_have_neon function.  There
- * are a number of implementations in contrib/arm-neon, but the only one that
- * has partial support is contrib/arm-neon/linux.c - a generic Linux
- * implementation which reads /proc/cpufino.
- */
-#include <signal.h> /* for sig_atomic_t */
-
-#ifndef PNG_ARM_NEON_FILE
-#  if defined(__aarch64__) || defined(_M_ARM64)
-     /* ARM Neon is expected to be unconditionally available on ARM64. */
-#    error "PNG_ARM_NEON_CHECK_SUPPORTED must not be defined on ARM64"
-#  elif defined(__ARM_NEON__) || defined(__ARM_NEON)
-     /* ARM Neon is expected to be available on the target CPU architecture. */
-#    error "PNG_ARM_NEON_CHECK_SUPPORTED must not be defined on this CPU arch"
-#  elif defined(__linux__)
-#    define PNG_ARM_NEON_FILE "contrib/arm-neon/linux.c"
-#  else
-#    error "No support for run-time ARM Neon checking; use compile-time options"
-#  endif
-#endif
-
-static int png_have_neon(png_structp png_ptr);
-#ifdef PNG_ARM_NEON_FILE
-#  include PNG_ARM_NEON_FILE
-#endif
-#endif /* PNG_ARM_NEON_CHECK_SUPPORTED */
-
-#ifndef PNG_ALIGNED_MEMORY_SUPPORTED
-#  error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED"
-#endif
-
-void
-png_init_filter_functions_neon(png_structp pp, unsigned int bpp)
-{
-   /* The switch statement is compiled in for ARM_NEON_API, the call to
-    * png_have_neon is compiled in for ARM_NEON_CHECK.  If both are defined
-    * the check is only performed if the API has not set the NEON option on
-    * or off explicitly.  In this case the check controls what happens.
-    *
-    * If the CHECK is not compiled in and the option is UNSET the behavior prior
-    * to 1.6.7 was to use the NEON code - this was a bug caused by having the
-    * wrong order of the 'ON' and 'default' cases.  UNSET now defaults to OFF,
-    * as documented in png.h
-    */
-   png_debug(1, "in png_init_filter_functions_neon");
-#ifdef PNG_ARM_NEON_API_SUPPORTED
-   switch ((pp->options >> PNG_ARM_NEON) & 3)
-   {
-      case PNG_OPTION_UNSET:
-         /* Allow the run-time check to execute if it has been enabled -
-          * thus both API and CHECK can be turned on.  If it isn't supported
-          * this case will fall through to the 'default' below, which just
-          * returns.
-          */
-#endif /* PNG_ARM_NEON_API_SUPPORTED */
-#ifdef PNG_ARM_NEON_CHECK_SUPPORTED
-         {
-            static volatile sig_atomic_t no_neon = -1; /* not checked */
-
-            if (no_neon < 0)
-               no_neon = !png_have_neon(pp);
-
-            if (no_neon)
-               return;
-         }
-#ifdef PNG_ARM_NEON_API_SUPPORTED
-         break;
-#endif
-#endif /* PNG_ARM_NEON_CHECK_SUPPORTED */
-
-#ifdef PNG_ARM_NEON_API_SUPPORTED
-      default: /* OFF or INVALID */
-         return;
-
-      case PNG_OPTION_ON:
-         /* Option turned on */
-         break;
-   }
-#endif
-
-   /* IMPORTANT: any new external functions used here must be declared using
-    * PNG_INTERNAL_FUNCTION in ../pngpriv.h.  This is required so that the
-    * 'prefix' option to configure works:
-    *
-    *    ./configure --with-libpng-prefix=foobar_
-    *
-    * Verify you have got this right by running the above command, doing a build
-    * and examining pngprefix.h; it must contain a #define for every external
-    * function you add.  (Notice that this happens automatically for the
-    * initialization function.)
-    */
-   pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;
-
-   if (bpp == 3)
-   {
-      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon;
-      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon;
-      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
-         png_read_filter_row_paeth3_neon;
-   }
-
-   else if (bpp == 4)
-   {
-      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon;
-      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon;
-      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
-          png_read_filter_row_paeth4_neon;
-   }
-}
-#endif /* PNG_ARM_NEON_OPT > 0 */
-#endif /* READ */
diff --git a/dep/libpng/src/arm/filter_neon.S b/dep/libpng/src/arm/filter_neon.S
deleted file mode 100644
index 2308aad13..000000000
--- a/dep/libpng/src/arm/filter_neon.S
+++ /dev/null
@@ -1,253 +0,0 @@
-
-/* filter_neon.S - NEON optimised filter functions
- *
- * Copyright (c) 2018 Cosmin Truta
- * Copyright (c) 2014,2017 Glenn Randers-Pehrson
- * Written by Mans Rullgard, 2011.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-/* This is required to get the symbol renames, which are #defines, and the
- * definitions (or not) of PNG_ARM_NEON_OPT and PNG_ARM_NEON_IMPLEMENTATION.
- */
-#define PNG_VERSION_INFO_ONLY
-#include "../pngpriv.h"
-
-#if (defined(__linux__) || defined(__FreeBSD__)) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */
-#endif
-
-#ifdef PNG_READ_SUPPORTED
-
-/* Assembler NEON support - only works for 32-bit ARM (i.e. it does not work for
- * ARM64).  The code in arm/filter_neon_intrinsics.c supports ARM64, however it
- * only works if -mfpu=neon is specified on the GCC command line.  See pngpriv.h
- * for the logic which sets PNG_USE_ARM_NEON_ASM:
- */
-#if PNG_ARM_NEON_IMPLEMENTATION == 2 /* hand-coded assembler */
-
-#if PNG_ARM_NEON_OPT > 0
-
-#ifdef __ELF__
-#   define ELF
-#else
-#   define ELF @
-#endif
-
-        .arch armv7-a
-        .fpu  neon
-
-.macro  func    name, export=0
-    .macro endfunc
-ELF     .size   \name, . - \name
-        .endfunc
-        .purgem endfunc
-    .endm
-        .text
-
-        /* Explicitly specifying alignment here because some versions of
-         * GAS don't align code correctly.  This is harmless in correctly
-         * written versions of GAS.
-         */
-        .align 2
-
-    .if \export
-        .global \name
-    .endif
-ELF     .type   \name, STT_FUNC
-        .func   \name
-\name:
-.endm
-
-func    png_read_filter_row_sub4_neon, export=1
-        ldr             r3,  [r0, #4]           @ rowbytes
-        vmov.i8         d3,  #0
-1:
-        vld4.32         {d4[],d5[],d6[],d7[]},    [r1,:128]
-        vadd.u8         d0,  d3,  d4
-        vadd.u8         d1,  d0,  d5
-        vadd.u8         d2,  d1,  d6
-        vadd.u8         d3,  d2,  d7
-        vst4.32         {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
-        subs            r3,  r3,  #16
-        bgt             1b
-
-        bx              lr
-endfunc
-
-func    png_read_filter_row_sub3_neon, export=1
-        ldr             r3,  [r0, #4]           @ rowbytes
-        vmov.i8         d3,  #0
-        mov             r0,  r1
-        mov             r2,  #3
-        mov             r12, #12
-        vld1.8          {q11},    [r0], r12
-1:
-        vext.8          d5,  d22, d23, #3
-        vadd.u8         d0,  d3,  d22
-        vext.8          d6,  d22, d23, #6
-        vadd.u8         d1,  d0,  d5
-        vext.8          d7,  d23, d23, #1
-        vld1.8          {q11},    [r0], r12
-        vst1.32         {d0[0]},  [r1,:32], r2
-        vadd.u8         d2,  d1,  d6
-        vst1.32         {d1[0]},  [r1], r2
-        vadd.u8         d3,  d2,  d7
-        vst1.32         {d2[0]},  [r1], r2
-        vst1.32         {d3[0]},  [r1], r2
-        subs            r3,  r3,  #12
-        bgt             1b
-
-        bx              lr
-endfunc
-
-func    png_read_filter_row_up_neon, export=1
-        ldr             r3,  [r0, #4]           @ rowbytes
-1:
-        vld1.8          {q0}, [r1,:128]
-        vld1.8          {q1}, [r2,:128]!
-        vadd.u8         q0,  q0,  q1
-        vst1.8          {q0}, [r1,:128]!
-        subs            r3,  r3,  #16
-        bgt             1b
-
-        bx              lr
-endfunc
-
-func    png_read_filter_row_avg4_neon, export=1
-        ldr             r12, [r0, #4]           @ rowbytes
-        vmov.i8         d3,  #0
-1:
-        vld4.32         {d4[],d5[],d6[],d7[]},    [r1,:128]
-        vld4.32         {d16[],d17[],d18[],d19[]},[r2,:128]!
-        vhadd.u8        d0,  d3,  d16
-        vadd.u8         d0,  d0,  d4
-        vhadd.u8        d1,  d0,  d17
-        vadd.u8         d1,  d1,  d5
-        vhadd.u8        d2,  d1,  d18
-        vadd.u8         d2,  d2,  d6
-        vhadd.u8        d3,  d2,  d19
-        vadd.u8         d3,  d3,  d7
-        vst4.32         {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
-        subs            r12, r12, #16
-        bgt             1b
-
-        bx              lr
-endfunc
-
-func    png_read_filter_row_avg3_neon, export=1
-        push            {r4,lr}
-        ldr             r12, [r0, #4]           @ rowbytes
-        vmov.i8         d3,  #0
-        mov             r0,  r1
-        mov             r4,  #3
-        mov             lr,  #12
-        vld1.8          {q11},    [r0], lr
-1:
-        vld1.8          {q10},    [r2], lr
-        vext.8          d5,  d22, d23, #3
-        vhadd.u8        d0,  d3,  d20
-        vext.8          d17, d20, d21, #3
-        vadd.u8         d0,  d0,  d22
-        vext.8          d6,  d22, d23, #6
-        vhadd.u8        d1,  d0,  d17
-        vext.8          d18, d20, d21, #6
-        vadd.u8         d1,  d1,  d5
-        vext.8          d7,  d23, d23, #1
-        vld1.8          {q11},    [r0], lr
-        vst1.32         {d0[0]},  [r1,:32], r4
-        vhadd.u8        d2,  d1,  d18
-        vst1.32         {d1[0]},  [r1], r4
-        vext.8          d19, d21, d21, #1
-        vadd.u8         d2,  d2,  d6
-        vhadd.u8        d3,  d2,  d19
-        vst1.32         {d2[0]},  [r1], r4
-        vadd.u8         d3,  d3,  d7
-        vst1.32         {d3[0]},  [r1], r4
-        subs            r12, r12, #12
-        bgt             1b
-
-        pop             {r4,pc}
-endfunc
-
-.macro  paeth           rx,  ra,  rb,  rc
-        vaddl.u8        q12, \ra, \rb           @ a + b
-        vaddl.u8        q15, \rc, \rc           @ 2*c
-        vabdl.u8        q13, \rb, \rc           @ pa
-        vabdl.u8        q14, \ra, \rc           @ pb
-        vabd.u16        q15, q12, q15           @ pc
-        vcle.u16        q12, q13, q14           @ pa <= pb
-        vcle.u16        q13, q13, q15           @ pa <= pc
-        vcle.u16        q14, q14, q15           @ pb <= pc
-        vand            q12, q12, q13           @ pa <= pb && pa <= pc
-        vmovn.u16       d28, q14
-        vmovn.u16       \rx, q12
-        vbsl            d28, \rb, \rc
-        vbsl            \rx, \ra, d28
-.endm
-
-func    png_read_filter_row_paeth4_neon, export=1
-        ldr             r12, [r0, #4]           @ rowbytes
-        vmov.i8         d3,  #0
-        vmov.i8         d20, #0
-1:
-        vld4.32         {d4[],d5[],d6[],d7[]},    [r1,:128]
-        vld4.32         {d16[],d17[],d18[],d19[]},[r2,:128]!
-        paeth           d0,  d3,  d16, d20
-        vadd.u8         d0,  d0,  d4
-        paeth           d1,  d0,  d17, d16
-        vadd.u8         d1,  d1,  d5
-        paeth           d2,  d1,  d18, d17
-        vadd.u8         d2,  d2,  d6
-        paeth           d3,  d2,  d19, d18
-        vmov            d20, d19
-        vadd.u8         d3,  d3,  d7
-        vst4.32         {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
-        subs            r12, r12, #16
-        bgt             1b
-
-        bx              lr
-endfunc
-
-func    png_read_filter_row_paeth3_neon, export=1
-        push            {r4,lr}
-        ldr             r12, [r0, #4]           @ rowbytes
-        vmov.i8         d3,  #0
-        vmov.i8         d4,  #0
-        mov             r0,  r1
-        mov             r4,  #3
-        mov             lr,  #12
-        vld1.8          {q11},    [r0], lr
-1:
-        vld1.8          {q10},    [r2], lr
-        paeth           d0,  d3,  d20, d4
-        vext.8          d5,  d22, d23, #3
-        vadd.u8         d0,  d0,  d22
-        vext.8          d17, d20, d21, #3
-        paeth           d1,  d0,  d17, d20
-        vst1.32         {d0[0]},  [r1,:32], r4
-        vext.8          d6,  d22, d23, #6
-        vadd.u8         d1,  d1,  d5
-        vext.8          d18, d20, d21, #6
-        paeth           d2,  d1,  d18, d17
-        vext.8          d7,  d23, d23, #1
-        vld1.8          {q11},    [r0], lr
-        vst1.32         {d1[0]},  [r1], r4
-        vadd.u8         d2,  d2,  d6
-        vext.8          d19, d21, d21, #1
-        paeth           d3,  d2,  d19, d18
-        vst1.32         {d2[0]},  [r1], r4
-        vmov            d4,  d19
-        vadd.u8         d3,  d3,  d7
-        vst1.32         {d3[0]},  [r1], r4
-        subs            r12, r12, #12
-        bgt             1b
-
-        pop             {r4,pc}
-endfunc
-#endif /* PNG_ARM_NEON_OPT > 0 */
-#endif /* PNG_ARM_NEON_IMPLEMENTATION == 2 (assembler) */
-#endif /* READ */
diff --git a/dep/libpng/src/arm/filter_neon_intrinsics.c b/dep/libpng/src/arm/filter_neon_intrinsics.c
deleted file mode 100644
index 4466d48b2..000000000
--- a/dep/libpng/src/arm/filter_neon_intrinsics.c
+++ /dev/null
@@ -1,402 +0,0 @@
-
-/* filter_neon_intrinsics.c - NEON optimised filter functions
- *
- * Copyright (c) 2018 Cosmin Truta
- * Copyright (c) 2014,2016 Glenn Randers-Pehrson
- * Written by James Yu <james.yu at linaro.org>, October 2013.
- * Based on filter_neon.S, written by Mans Rullgard, 2011.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-#include "../pngpriv.h"
-
-#ifdef PNG_READ_SUPPORTED
-
-/* This code requires -mfpu=neon on the command line: */
-#if PNG_ARM_NEON_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */
-
-#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_ARM64)
-#  include <arm64_neon.h>
-#else
-#  include <arm_neon.h>
-#endif
-
-/* libpng row pointers are not necessarily aligned to any particular boundary,
- * however this code will only work with appropriate alignment.  arm/arm_init.c
- * checks for this (and will not compile unless it is done). This code uses
- * variants of png_aligncast to avoid compiler warnings.
- */
-#define png_ptr(type,pointer) png_aligncast(type *,pointer)
-#define png_ptrc(type,pointer) png_aligncastconst(const type *,pointer)
-
-/* The following relies on a variable 'temp_pointer' being declared with type
- * 'type'.  This is written this way just to hide the GCC strict aliasing
- * warning; note that the code is safe because there never is an alias between
- * the input and output pointers.
- *
- * When compiling with MSVC ARM64, the png_ldr macro can't be passed directly
- * to vst4_lane_u32, because of an internal compiler error inside MSVC.
- * To avoid this compiler bug, we use a temporary variable (vdest_val) to store
- * the result of png_ldr.
- */
-#define png_ldr(type,pointer)\
-   (temp_pointer = png_ptr(type,pointer), *temp_pointer)
-
-#if PNG_ARM_NEON_OPT > 0
-
-void
-png_read_filter_row_up_neon(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev_row)
-{
-   png_bytep rp = row;
-   png_bytep rp_stop = row + row_info->rowbytes;
-   png_const_bytep pp = prev_row;
-
-   png_debug(1, "in png_read_filter_row_up_neon");
-
-   for (; rp < rp_stop; rp += 16, pp += 16)
-   {
-      uint8x16_t qrp, qpp;
-
-      qrp = vld1q_u8(rp);
-      qpp = vld1q_u8(pp);
-      qrp = vaddq_u8(qrp, qpp);
-      vst1q_u8(rp, qrp);
-   }
-}
-
-void
-png_read_filter_row_sub3_neon(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev_row)
-{
-   png_bytep rp = row;
-   png_bytep rp_stop = row + row_info->rowbytes;
-
-   uint8x16_t vtmp = vld1q_u8(rp);
-   uint8x8x2_t *vrpt = png_ptr(uint8x8x2_t, &vtmp);
-   uint8x8x2_t vrp = *vrpt;
-
-   uint8x8x4_t vdest;
-   vdest.val[3] = vdup_n_u8(0);
-
-   png_debug(1, "in png_read_filter_row_sub3_neon");
-
-   for (; rp < rp_stop;)
-   {
-      uint8x8_t vtmp1, vtmp2;
-      uint32x2_t *temp_pointer;
-
-      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 3);
-      vdest.val[0] = vadd_u8(vdest.val[3], vrp.val[0]);
-      vtmp2 = vext_u8(vrp.val[0], vrp.val[1], 6);
-      vdest.val[1] = vadd_u8(vdest.val[0], vtmp1);
-
-      vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1);
-      vdest.val[2] = vadd_u8(vdest.val[1], vtmp2);
-      vdest.val[3] = vadd_u8(vdest.val[2], vtmp1);
-
-      vtmp = vld1q_u8(rp + 12);
-      vrpt = png_ptr(uint8x8x2_t, &vtmp);
-      vrp = *vrpt;
-
-      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[0]), 0);
-      rp += 3;
-      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[1]), 0);
-      rp += 3;
-      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[2]), 0);
-      rp += 3;
-      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[3]), 0);
-      rp += 3;
-   }
-
-   PNG_UNUSED(prev_row)
-}
-
-void
-png_read_filter_row_sub4_neon(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev_row)
-{
-   png_bytep rp = row;
-   png_bytep rp_stop = row + row_info->rowbytes;
-
-   uint8x8x4_t vdest;
-   vdest.val[3] = vdup_n_u8(0);
-
-   png_debug(1, "in png_read_filter_row_sub4_neon");
-
-   for (; rp < rp_stop; rp += 16)
-   {
-      uint32x2x4_t vtmp = vld4_u32(png_ptr(uint32_t,rp));
-      uint8x8x4_t *vrpt = png_ptr(uint8x8x4_t,&vtmp);
-      uint8x8x4_t vrp = *vrpt;
-      uint32x2x4_t *temp_pointer;
-      uint32x2x4_t vdest_val;
-
-      vdest.val[0] = vadd_u8(vdest.val[3], vrp.val[0]);
-      vdest.val[1] = vadd_u8(vdest.val[0], vrp.val[1]);
-      vdest.val[2] = vadd_u8(vdest.val[1], vrp.val[2]);
-      vdest.val[3] = vadd_u8(vdest.val[2], vrp.val[3]);
-
-      vdest_val = png_ldr(uint32x2x4_t, &vdest);
-      vst4_lane_u32(png_ptr(uint32_t,rp), vdest_val, 0);
-   }
-
-   PNG_UNUSED(prev_row)
-}
-
-void
-png_read_filter_row_avg3_neon(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev_row)
-{
-   png_bytep rp = row;
-   png_const_bytep pp = prev_row;
-   png_bytep rp_stop = row + row_info->rowbytes;
-
-   uint8x16_t vtmp;
-   uint8x8x2_t *vrpt;
-   uint8x8x2_t vrp;
-   uint8x8x4_t vdest;
-   vdest.val[3] = vdup_n_u8(0);
-
-   vtmp = vld1q_u8(rp);
-   vrpt = png_ptr(uint8x8x2_t,&vtmp);
-   vrp = *vrpt;
-
-   png_debug(1, "in png_read_filter_row_avg3_neon");
-
-   for (; rp < rp_stop; pp += 12)
-   {
-      uint8x8_t vtmp1, vtmp2, vtmp3;
-
-      uint8x8x2_t *vppt;
-      uint8x8x2_t vpp;
-
-      uint32x2_t *temp_pointer;
-
-      vtmp = vld1q_u8(pp);
-      vppt = png_ptr(uint8x8x2_t,&vtmp);
-      vpp = *vppt;
-
-      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 3);
-      vdest.val[0] = vhadd_u8(vdest.val[3], vpp.val[0]);
-      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
-
-      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], 3);
-      vtmp3 = vext_u8(vrp.val[0], vrp.val[1], 6);
-      vdest.val[1] = vhadd_u8(vdest.val[0], vtmp2);
-      vdest.val[1] = vadd_u8(vdest.val[1], vtmp1);
-
-      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], 6);
-      vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1);
-
-      vtmp = vld1q_u8(rp + 12);
-      vrpt = png_ptr(uint8x8x2_t,&vtmp);
-      vrp = *vrpt;
-
-      vdest.val[2] = vhadd_u8(vdest.val[1], vtmp2);
-      vdest.val[2] = vadd_u8(vdest.val[2], vtmp3);
-
-      vtmp2 = vext_u8(vpp.val[1], vpp.val[1], 1);
-
-      vdest.val[3] = vhadd_u8(vdest.val[2], vtmp2);
-      vdest.val[3] = vadd_u8(vdest.val[3], vtmp1);
-
-      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[0]), 0);
-      rp += 3;
-      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[1]), 0);
-      rp += 3;
-      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[2]), 0);
-      rp += 3;
-      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[3]), 0);
-      rp += 3;
-   }
-}
-
-void
-png_read_filter_row_avg4_neon(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev_row)
-{
-   png_bytep rp = row;
-   png_bytep rp_stop = row + row_info->rowbytes;
-   png_const_bytep pp = prev_row;
-
-   uint8x8x4_t vdest;
-   vdest.val[3] = vdup_n_u8(0);
-
-   png_debug(1, "in png_read_filter_row_avg4_neon");
-
-   for (; rp < rp_stop; rp += 16, pp += 16)
-   {
-      uint32x2x4_t vtmp;
-      uint8x8x4_t *vrpt, *vppt;
-      uint8x8x4_t vrp, vpp;
-      uint32x2x4_t *temp_pointer;
-      uint32x2x4_t vdest_val;
-
-      vtmp = vld4_u32(png_ptr(uint32_t,rp));
-      vrpt = png_ptr(uint8x8x4_t,&vtmp);
-      vrp = *vrpt;
-      vtmp = vld4_u32(png_ptrc(uint32_t,pp));
-      vppt = png_ptr(uint8x8x4_t,&vtmp);
-      vpp = *vppt;
-
-      vdest.val[0] = vhadd_u8(vdest.val[3], vpp.val[0]);
-      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
-      vdest.val[1] = vhadd_u8(vdest.val[0], vpp.val[1]);
-      vdest.val[1] = vadd_u8(vdest.val[1], vrp.val[1]);
-      vdest.val[2] = vhadd_u8(vdest.val[1], vpp.val[2]);
-      vdest.val[2] = vadd_u8(vdest.val[2], vrp.val[2]);
-      vdest.val[3] = vhadd_u8(vdest.val[2], vpp.val[3]);
-      vdest.val[3] = vadd_u8(vdest.val[3], vrp.val[3]);
-
-      vdest_val = png_ldr(uint32x2x4_t, &vdest);
-      vst4_lane_u32(png_ptr(uint32_t,rp), vdest_val, 0);
-   }
-}
-
-static uint8x8_t
-paeth(uint8x8_t a, uint8x8_t b, uint8x8_t c)
-{
-   uint8x8_t d, e;
-   uint16x8_t p1, pa, pb, pc;
-
-   p1 = vaddl_u8(a, b); /* a + b */
-   pc = vaddl_u8(c, c); /* c * 2 */
-   pa = vabdl_u8(b, c); /* pa */
-   pb = vabdl_u8(a, c); /* pb */
-   pc = vabdq_u16(p1, pc); /* pc */
-
-   p1 = vcleq_u16(pa, pb); /* pa <= pb */
-   pa = vcleq_u16(pa, pc); /* pa <= pc */
-   pb = vcleq_u16(pb, pc); /* pb <= pc */
-
-   p1 = vandq_u16(p1, pa); /* pa <= pb && pa <= pc */
-
-   d = vmovn_u16(pb);
-   e = vmovn_u16(p1);
-
-   d = vbsl_u8(d, b, c);
-   e = vbsl_u8(e, a, d);
-
-   return e;
-}
-
-void
-png_read_filter_row_paeth3_neon(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev_row)
-{
-   png_bytep rp = row;
-   png_const_bytep pp = prev_row;
-   png_bytep rp_stop = row + row_info->rowbytes;
-
-   uint8x16_t vtmp;
-   uint8x8x2_t *vrpt;
-   uint8x8x2_t vrp;
-   uint8x8_t vlast = vdup_n_u8(0);
-   uint8x8x4_t vdest;
-   vdest.val[3] = vdup_n_u8(0);
-
-   vtmp = vld1q_u8(rp);
-   vrpt = png_ptr(uint8x8x2_t,&vtmp);
-   vrp = *vrpt;
-
-   png_debug(1, "in png_read_filter_row_paeth3_neon");
-
-   for (; rp < rp_stop; pp += 12)
-   {
-      uint8x8x2_t *vppt;
-      uint8x8x2_t vpp;
-      uint8x8_t vtmp1, vtmp2, vtmp3;
-      uint32x2_t *temp_pointer;
-
-      vtmp = vld1q_u8(pp);
-      vppt = png_ptr(uint8x8x2_t,&vtmp);
-      vpp = *vppt;
-
-      vdest.val[0] = paeth(vdest.val[3], vpp.val[0], vlast);
-      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
-
-      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 3);
-      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], 3);
-      vdest.val[1] = paeth(vdest.val[0], vtmp2, vpp.val[0]);
-      vdest.val[1] = vadd_u8(vdest.val[1], vtmp1);
-
-      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 6);
-      vtmp3 = vext_u8(vpp.val[0], vpp.val[1], 6);
-      vdest.val[2] = paeth(vdest.val[1], vtmp3, vtmp2);
-      vdest.val[2] = vadd_u8(vdest.val[2], vtmp1);
-
-      vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1);
-      vtmp2 = vext_u8(vpp.val[1], vpp.val[1], 1);
-
-      vtmp = vld1q_u8(rp + 12);
-      vrpt = png_ptr(uint8x8x2_t,&vtmp);
-      vrp = *vrpt;
-
-      vdest.val[3] = paeth(vdest.val[2], vtmp2, vtmp3);
-      vdest.val[3] = vadd_u8(vdest.val[3], vtmp1);
-
-      vlast = vtmp2;
-
-      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[0]), 0);
-      rp += 3;
-      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[1]), 0);
-      rp += 3;
-      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[2]), 0);
-      rp += 3;
-      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[3]), 0);
-      rp += 3;
-   }
-}
-
-void
-png_read_filter_row_paeth4_neon(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev_row)
-{
-   png_bytep rp = row;
-   png_bytep rp_stop = row + row_info->rowbytes;
-   png_const_bytep pp = prev_row;
-
-   uint8x8_t vlast = vdup_n_u8(0);
-   uint8x8x4_t vdest;
-   vdest.val[3] = vdup_n_u8(0);
-
-   png_debug(1, "in png_read_filter_row_paeth4_neon");
-
-   for (; rp < rp_stop; rp += 16, pp += 16)
-   {
-      uint32x2x4_t vtmp;
-      uint8x8x4_t *vrpt, *vppt;
-      uint8x8x4_t vrp, vpp;
-      uint32x2x4_t *temp_pointer;
-      uint32x2x4_t vdest_val;
-
-      vtmp = vld4_u32(png_ptr(uint32_t,rp));
-      vrpt = png_ptr(uint8x8x4_t,&vtmp);
-      vrp = *vrpt;
-      vtmp = vld4_u32(png_ptrc(uint32_t,pp));
-      vppt = png_ptr(uint8x8x4_t,&vtmp);
-      vpp = *vppt;
-
-      vdest.val[0] = paeth(vdest.val[3], vpp.val[0], vlast);
-      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
-      vdest.val[1] = paeth(vdest.val[0], vpp.val[1], vpp.val[0]);
-      vdest.val[1] = vadd_u8(vdest.val[1], vrp.val[1]);
-      vdest.val[2] = paeth(vdest.val[1], vpp.val[2], vpp.val[1]);
-      vdest.val[2] = vadd_u8(vdest.val[2], vrp.val[2]);
-      vdest.val[3] = paeth(vdest.val[2], vpp.val[3], vpp.val[2]);
-      vdest.val[3] = vadd_u8(vdest.val[3], vrp.val[3]);
-
-      vlast = vpp.val[3];
-
-      vdest_val = png_ldr(uint32x2x4_t, &vdest);
-      vst4_lane_u32(png_ptr(uint32_t,rp), vdest_val, 0);
-   }
-}
-
-#endif /* PNG_ARM_NEON_OPT > 0 */
-#endif /* PNG_ARM_NEON_IMPLEMENTATION == 1 (intrinsics) */
-#endif /* READ */
diff --git a/dep/libpng/src/arm/palette_neon_intrinsics.c b/dep/libpng/src/arm/palette_neon_intrinsics.c
deleted file mode 100644
index 92c7d6f9f..000000000
--- a/dep/libpng/src/arm/palette_neon_intrinsics.c
+++ /dev/null
@@ -1,151 +0,0 @@
-
-/* palette_neon_intrinsics.c - NEON optimised palette expansion functions
- *
- * Copyright (c) 2018-2019 Cosmin Truta
- * Copyright (c) 2017-2018 Arm Holdings. All rights reserved.
- * Written by Richard Townsend <Richard.Townsend@arm.com>, February 2017.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-#include "../pngpriv.h"
-
-#if PNG_ARM_NEON_IMPLEMENTATION == 1
-
-#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_ARM64)
-#  include <arm64_neon.h>
-#else
-#  include <arm_neon.h>
-#endif
-
-/* Build an RGBA8 palette from the separate RGB and alpha palettes. */
-void
-png_riffle_palette_neon(png_structrp png_ptr)
-{
-   png_const_colorp palette = png_ptr->palette;
-   png_bytep riffled_palette = png_ptr->riffled_palette;
-   png_const_bytep trans_alpha = png_ptr->trans_alpha;
-   int num_trans = png_ptr->num_trans;
-   int i;
-
-   /* Initially black, opaque. */
-   uint8x16x4_t w = {{
-      vdupq_n_u8(0x00),
-      vdupq_n_u8(0x00),
-      vdupq_n_u8(0x00),
-      vdupq_n_u8(0xff),
-   }};
-
-   png_debug(1, "in png_riffle_palette_neon");
-
-   /* First, riffle the RGB colours into an RGBA8 palette.
-    * The alpha component is set to opaque for now.
-    */
-   for (i = 0; i < 256; i += 16)
-   {
-      uint8x16x3_t v = vld3q_u8((png_const_bytep)(palette + i));
-      w.val[0] = v.val[0];
-      w.val[1] = v.val[1];
-      w.val[2] = v.val[2];
-      vst4q_u8(riffled_palette + (i << 2), w);
-   }
-
-   /* Fix up the missing transparency values. */
-   for (i = 0; i < num_trans; i++)
-      riffled_palette[(i << 2) + 3] = trans_alpha[i];
-}
-
-/* Expands a palettized row into RGBA8. */
-int
-png_do_expand_palette_rgba8_neon(png_structrp png_ptr, png_row_infop row_info,
-    png_const_bytep row, png_bytepp ssp, png_bytepp ddp)
-{
-   png_uint_32 row_width = row_info->width;
-   const png_uint_32 *riffled_palette =
-      (const png_uint_32 *)png_ptr->riffled_palette;
-   const png_uint_32 pixels_per_chunk = 4;
-   png_uint_32 i;
-
-   png_debug(1, "in png_do_expand_palette_rgba8_neon");
-
-   PNG_UNUSED(row)
-   if (row_width < pixels_per_chunk)
-      return 0;
-
-   /* This function originally gets the last byte of the output row.
-    * The NEON part writes forward from a given position, so we have
-    * to seek this back by 4 pixels x 4 bytes.
-    */
-   *ddp = *ddp - ((pixels_per_chunk * sizeof(png_uint_32)) - 1);
-
-   for (i = 0; i < row_width; i += pixels_per_chunk)
-   {
-      uint32x4_t cur;
-      png_bytep sp = *ssp - i, dp = *ddp - (i << 2);
-      cur = vld1q_dup_u32 (riffled_palette + *(sp - 3));
-      cur = vld1q_lane_u32(riffled_palette + *(sp - 2), cur, 1);
-      cur = vld1q_lane_u32(riffled_palette + *(sp - 1), cur, 2);
-      cur = vld1q_lane_u32(riffled_palette + *(sp - 0), cur, 3);
-      vst1q_u32((void *)dp, cur);
-   }
-   if (i != row_width)
-   {
-      /* Remove the amount that wasn't processed. */
-      i -= pixels_per_chunk;
-   }
-
-   /* Decrement output pointers. */
-   *ssp = *ssp - i;
-   *ddp = *ddp - (i << 2);
-   return i;
-}
-
-/* Expands a palettized row into RGB8. */
-int
-png_do_expand_palette_rgb8_neon(png_structrp png_ptr, png_row_infop row_info,
-    png_const_bytep row, png_bytepp ssp, png_bytepp ddp)
-{
-   png_uint_32 row_width = row_info->width;
-   png_const_bytep palette = (png_const_bytep)png_ptr->palette;
-   const png_uint_32 pixels_per_chunk = 8;
-   png_uint_32 i;
-
-   png_debug(1, "in png_do_expand_palette_rgb8_neon");
-
-   PNG_UNUSED(row)
-   if (row_width <= pixels_per_chunk)
-      return 0;
-
-   /* Seeking this back by 8 pixels x 3 bytes. */
-   *ddp = *ddp - ((pixels_per_chunk * sizeof(png_color)) - 1);
-
-   for (i = 0; i < row_width; i += pixels_per_chunk)
-   {
-      uint8x8x3_t cur;
-      png_bytep sp = *ssp - i, dp = *ddp - ((i << 1) + i);
-      cur = vld3_dup_u8(palette + sizeof(png_color) * (*(sp - 7)));
-      cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 6)), cur, 1);
-      cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 5)), cur, 2);
-      cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 4)), cur, 3);
-      cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 3)), cur, 4);
-      cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 2)), cur, 5);
-      cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 1)), cur, 6);
-      cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 0)), cur, 7);
-      vst3_u8((void *)dp, cur);
-   }
-
-   if (i != row_width)
-   {
-      /* Remove the amount that wasn't processed. */
-      i -= pixels_per_chunk;
-   }
-
-   /* Decrement output pointers. */
-   *ssp = *ssp - i;
-   *ddp = *ddp - ((i << 1) + i);
-   return i;
-}
-
-#endif /* PNG_ARM_NEON_IMPLEMENTATION */
diff --git a/dep/libpng/src/intel/filter_sse2_intrinsics.c b/dep/libpng/src/intel/filter_sse2_intrinsics.c
deleted file mode 100644
index d3c0fe9e2..000000000
--- a/dep/libpng/src/intel/filter_sse2_intrinsics.c
+++ /dev/null
@@ -1,391 +0,0 @@
-
-/* filter_sse2_intrinsics.c - SSE2 optimized filter functions
- *
- * Copyright (c) 2018 Cosmin Truta
- * Copyright (c) 2016-2017 Glenn Randers-Pehrson
- * Written by Mike Klein and Matt Sarett
- * Derived from arm/filter_neon_intrinsics.c
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-#include "../pngpriv.h"
-
-#ifdef PNG_READ_SUPPORTED
-
-#if PNG_INTEL_SSE_IMPLEMENTATION > 0
-
-#include <immintrin.h>
-
-/* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d).
- * They're positioned like this:
- *    prev:  c b
- *    row:   a d
- * The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be
- * whichever of a, b, or c is closest to p=a+b-c.
- */
-
-static __m128i load4(const void* p) {
-   int tmp;
-   memcpy(&tmp, p, sizeof(tmp));
-   return _mm_cvtsi32_si128(tmp);
-}
-
-static void store4(void* p, __m128i v) {
-   int tmp = _mm_cvtsi128_si32(v);
-   memcpy(p, &tmp, sizeof(int));
-}
-
-static __m128i load3(const void* p) {
-   png_uint_32 tmp = 0;
-   memcpy(&tmp, p, 3);
-   return _mm_cvtsi32_si128(tmp);
-}
-
-static void store3(void* p, __m128i v) {
-   int tmp = _mm_cvtsi128_si32(v);
-   memcpy(p, &tmp, 3);
-}
-
-void png_read_filter_row_sub3_sse2(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev)
-{
-   /* The Sub filter predicts each pixel as the previous pixel, a.
-    * There is no pixel to the left of the first pixel.  It's encoded directly.
-    * That works with our main loop if we just say that left pixel was zero.
-    */
-   size_t rb;
-
-   __m128i a, d = _mm_setzero_si128();
-
-   png_debug(1, "in png_read_filter_row_sub3_sse2");
-
-   rb = row_info->rowbytes;
-   while (rb >= 4) {
-      a = d; d = load4(row);
-      d = _mm_add_epi8(d, a);
-      store3(row, d);
-
-      row += 3;
-      rb  -= 3;
-   }
-   if (rb > 0) {
-      a = d; d = load3(row);
-      d = _mm_add_epi8(d, a);
-      store3(row, d);
-
-      row += 3;
-      rb  -= 3;
-   }
-   PNG_UNUSED(prev)
-}
-
-void png_read_filter_row_sub4_sse2(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev)
-{
-   /* The Sub filter predicts each pixel as the previous pixel, a.
-    * There is no pixel to the left of the first pixel.  It's encoded directly.
-    * That works with our main loop if we just say that left pixel was zero.
-    */
-   size_t rb;
-
-   __m128i a, d = _mm_setzero_si128();
-
-   png_debug(1, "in png_read_filter_row_sub4_sse2");
-
-   rb = row_info->rowbytes+4;
-   while (rb > 4) {
-      a = d; d = load4(row);
-      d = _mm_add_epi8(d, a);
-      store4(row, d);
-
-      row += 4;
-      rb  -= 4;
-   }
-   PNG_UNUSED(prev)
-}
-
-void png_read_filter_row_avg3_sse2(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev)
-{
-   /* The Avg filter predicts each pixel as the (truncated) average of a and b.
-    * There's no pixel to the left of the first pixel.  Luckily, it's
-    * predicted to be half of the pixel above it.  So again, this works
-    * perfectly with our loop if we make sure a starts at zero.
-    */
-
-   size_t rb;
-
-   const __m128i zero = _mm_setzero_si128();
-
-   __m128i    b;
-   __m128i a, d = zero;
-
-   png_debug(1, "in png_read_filter_row_avg3_sse2");
-   rb = row_info->rowbytes;
-   while (rb >= 4) {
-      __m128i avg;
-             b = load4(prev);
-      a = d; d = load4(row );
-
-      /* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */
-      avg = _mm_avg_epu8(a,b);
-      /* ...but we can fix it up by subtracting off 1 if it rounded up. */
-      avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b),
-                                            _mm_set1_epi8(1)));
-      d = _mm_add_epi8(d, avg);
-      store3(row, d);
-
-      prev += 3;
-      row  += 3;
-      rb   -= 3;
-   }
-   if (rb > 0) {
-      __m128i avg;
-             b = load3(prev);
-      a = d; d = load3(row );
-
-      /* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */
-      avg = _mm_avg_epu8(a,b);
-      /* ...but we can fix it up by subtracting off 1 if it rounded up. */
-      avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b),
-                                            _mm_set1_epi8(1)));
-
-      d = _mm_add_epi8(d, avg);
-      store3(row, d);
-
-      prev += 3;
-      row  += 3;
-      rb   -= 3;
-   }
-}
-
-void png_read_filter_row_avg4_sse2(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev)
-{
-   /* The Avg filter predicts each pixel as the (truncated) average of a and b.
-    * There's no pixel to the left of the first pixel.  Luckily, it's
-    * predicted to be half of the pixel above it.  So again, this works
-    * perfectly with our loop if we make sure a starts at zero.
-    */
-   size_t rb;
-   const __m128i zero = _mm_setzero_si128();
-   __m128i    b;
-   __m128i a, d = zero;
-
-   png_debug(1, "in png_read_filter_row_avg4_sse2");
-
-   rb = row_info->rowbytes+4;
-   while (rb > 4) {
-      __m128i avg;
-             b = load4(prev);
-      a = d; d = load4(row );
-
-      /* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */
-      avg = _mm_avg_epu8(a,b);
-      /* ...but we can fix it up by subtracting off 1 if it rounded up. */
-      avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b),
-                                            _mm_set1_epi8(1)));
-
-      d = _mm_add_epi8(d, avg);
-      store4(row, d);
-
-      prev += 4;
-      row  += 4;
-      rb   -= 4;
-   }
-}
-
-/* Returns |x| for 16-bit lanes. */
-static __m128i abs_i16(__m128i x) {
-#if PNG_INTEL_SSE_IMPLEMENTATION >= 2
-   return _mm_abs_epi16(x);
-#else
-   /* Read this all as, return x<0 ? -x : x.
-   * To negate two's complement, you flip all the bits then add 1.
-    */
-   __m128i is_negative = _mm_cmplt_epi16(x, _mm_setzero_si128());
-
-   /* Flip negative lanes. */
-   x = _mm_xor_si128(x, is_negative);
-
-   /* +1 to negative lanes, else +0. */
-   x = _mm_sub_epi16(x, is_negative);
-   return x;
-#endif
-}
-
-/* Bytewise c ? t : e. */
-static __m128i if_then_else(__m128i c, __m128i t, __m128i e) {
-#if PNG_INTEL_SSE_IMPLEMENTATION >= 3
-   return _mm_blendv_epi8(e,t,c);
-#else
-   return _mm_or_si128(_mm_and_si128(c, t), _mm_andnot_si128(c, e));
-#endif
-}
-
-void png_read_filter_row_paeth3_sse2(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev)
-{
-   /* Paeth tries to predict pixel d using the pixel to the left of it, a,
-    * and two pixels from the previous row, b and c:
-    *   prev: c b
-    *   row:  a d
-    * The Paeth function predicts d to be whichever of a, b, or c is nearest to
-    * p=a+b-c.
-    *
-    * The first pixel has no left context, and so uses an Up filter, p = b.
-    * This works naturally with our main loop's p = a+b-c if we force a and c
-    * to zero.
-    * Here we zero b and d, which become c and a respectively at the start of
-    * the loop.
-    */
-   size_t rb;
-   const __m128i zero = _mm_setzero_si128();
-   __m128i c, b = zero,
-           a, d = zero;
-
-   png_debug(1, "in png_read_filter_row_paeth3_sse2");
-
-   rb = row_info->rowbytes;
-   while (rb >= 4) {
-      /* It's easiest to do this math (particularly, deal with pc) with 16-bit
-       * intermediates.
-       */
-      __m128i pa,pb,pc,smallest,nearest;
-      c = b; b = _mm_unpacklo_epi8(load4(prev), zero);
-      a = d; d = _mm_unpacklo_epi8(load4(row ), zero);
-
-      /* (p-a) == (a+b-c - a) == (b-c) */
-
-      pa = _mm_sub_epi16(b,c);
-
-      /* (p-b) == (a+b-c - b) == (a-c) */
-      pb = _mm_sub_epi16(a,c);
-
-      /* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */
-      pc = _mm_add_epi16(pa,pb);
-
-      pa = abs_i16(pa);  /* |p-a| */
-      pb = abs_i16(pb);  /* |p-b| */
-      pc = abs_i16(pc);  /* |p-c| */
-
-      smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
-
-      /* Paeth breaks ties favoring a over b over c. */
-      nearest  = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
-                 if_then_else(_mm_cmpeq_epi16(smallest, pb), b,
-                                                             c));
-
-      /* Note `_epi8`: we need addition to wrap modulo 255. */
-      d = _mm_add_epi8(d, nearest);
-      store3(row, _mm_packus_epi16(d,d));
-
-      prev += 3;
-      row  += 3;
-      rb   -= 3;
-   }
-   if (rb > 0) {
-      /* It's easiest to do this math (particularly, deal with pc) with 16-bit
-       * intermediates.
-       */
-      __m128i pa,pb,pc,smallest,nearest;
-      c = b; b = _mm_unpacklo_epi8(load3(prev), zero);
-      a = d; d = _mm_unpacklo_epi8(load3(row ), zero);
-
-      /* (p-a) == (a+b-c - a) == (b-c) */
-      pa = _mm_sub_epi16(b,c);
-
-      /* (p-b) == (a+b-c - b) == (a-c) */
-      pb = _mm_sub_epi16(a,c);
-
-      /* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */
-      pc = _mm_add_epi16(pa,pb);
-
-      pa = abs_i16(pa);  /* |p-a| */
-      pb = abs_i16(pb);  /* |p-b| */
-      pc = abs_i16(pc);  /* |p-c| */
-
-      smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
-
-      /* Paeth breaks ties favoring a over b over c. */
-      nearest  = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
-                         if_then_else(_mm_cmpeq_epi16(smallest, pb), b,
-                                                                     c));
-
-      /* Note `_epi8`: we need addition to wrap modulo 255. */
-      d = _mm_add_epi8(d, nearest);
-      store3(row, _mm_packus_epi16(d,d));
-
-      prev += 3;
-      row  += 3;
-      rb   -= 3;
-   }
-}
-
-void png_read_filter_row_paeth4_sse2(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev)
-{
-   /* Paeth tries to predict pixel d using the pixel to the left of it, a,
-    * and two pixels from the previous row, b and c:
-    *   prev: c b
-    *   row:  a d
-    * The Paeth function predicts d to be whichever of a, b, or c is nearest to
-    * p=a+b-c.
-    *
-    * The first pixel has no left context, and so uses an Up filter, p = b.
-    * This works naturally with our main loop's p = a+b-c if we force a and c
-    * to zero.
-    * Here we zero b and d, which become c and a respectively at the start of
-    * the loop.
-    */
-   size_t rb;
-   const __m128i zero = _mm_setzero_si128();
-   __m128i pa,pb,pc,smallest,nearest;
-   __m128i c, b = zero,
-           a, d = zero;
-
-   png_debug(1, "in png_read_filter_row_paeth4_sse2");
-
-   rb = row_info->rowbytes+4;
-   while (rb > 4) {
-      /* It's easiest to do this math (particularly, deal with pc) with 16-bit
-       * intermediates.
-       */
-      c = b; b = _mm_unpacklo_epi8(load4(prev), zero);
-      a = d; d = _mm_unpacklo_epi8(load4(row ), zero);
-
-      /* (p-a) == (a+b-c - a) == (b-c) */
-      pa = _mm_sub_epi16(b,c);
-
-      /* (p-b) == (a+b-c - b) == (a-c) */
-      pb = _mm_sub_epi16(a,c);
-
-      /* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */
-      pc = _mm_add_epi16(pa,pb);
-
-      pa = abs_i16(pa);  /* |p-a| */
-      pb = abs_i16(pb);  /* |p-b| */
-      pc = abs_i16(pc);  /* |p-c| */
-
-      smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
-
-      /* Paeth breaks ties favoring a over b over c. */
-      nearest  = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
-                         if_then_else(_mm_cmpeq_epi16(smallest, pb), b,
-                                                                     c));
-
-      /* Note `_epi8`: we need addition to wrap modulo 255. */
-      d = _mm_add_epi8(d, nearest);
-      store4(row, _mm_packus_epi16(d,d));
-
-      prev += 4;
-      row  += 4;
-      rb   -= 4;
-   }
-}
-
-#endif /* PNG_INTEL_SSE_IMPLEMENTATION > 0 */
-#endif /* READ */
diff --git a/dep/libpng/src/intel/intel_init.c b/dep/libpng/src/intel/intel_init.c
deleted file mode 100644
index 2f8168b7c..000000000
--- a/dep/libpng/src/intel/intel_init.c
+++ /dev/null
@@ -1,52 +0,0 @@
-
-/* intel_init.c - SSE2 optimized filter functions
- *
- * Copyright (c) 2018 Cosmin Truta
- * Copyright (c) 2016-2017 Glenn Randers-Pehrson
- * Written by Mike Klein and Matt Sarett, Google, Inc.
- * Derived from arm/arm_init.c
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-#include "../pngpriv.h"
-
-#ifdef PNG_READ_SUPPORTED
-#if PNG_INTEL_SSE_IMPLEMENTATION > 0
-
-void
-png_init_filter_functions_sse2(png_structp pp, unsigned int bpp)
-{
-   /* The techniques used to implement each of these filters in SSE operate on
-    * one pixel at a time.
-    * So they generally speed up 3bpp images about 3x, 4bpp images about 4x.
-    * They can scale up to 6 and 8 bpp images and down to 2 bpp images,
-    * but they'd not likely have any benefit for 1bpp images.
-    * Most of these can be implemented using only MMX and 64-bit registers,
-    * but they end up a bit slower than using the equally-ubiquitous SSE2.
-   */
-   png_debug(1, "in png_init_filter_functions_sse2");
-   if (bpp == 3)
-   {
-      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_sse2;
-      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_sse2;
-      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
-         png_read_filter_row_paeth3_sse2;
-   }
-   else if (bpp == 4)
-   {
-      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_sse2;
-      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_sse2;
-      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
-          png_read_filter_row_paeth4_sse2;
-   }
-
-   /* No need optimize PNG_FILTER_VALUE_UP.  The compiler should
-    * autovectorize.
-    */
-}
-
-#endif /* PNG_INTEL_SSE_IMPLEMENTATION > 0 */
-#endif /* PNG_READ_SUPPORTED */
diff --git a/dep/libpng/src/png.c b/dep/libpng/src/png.c
deleted file mode 100644
index 9ed315700..000000000
--- a/dep/libpng/src/png.c
+++ /dev/null
@@ -1,4559 +0,0 @@
-
-/* png.c - location for general purpose libpng functions
- *
- * Copyright (c) 2018-2024 Cosmin Truta
- * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
- * Copyright (c) 1996-1997 Andreas Dilger
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-#include "pngpriv.h"
-
-/* Generate a compiler error if there is an old png.h in the search path. */
-typedef png_libpng_version_1_6_43 Your_png_h_is_not_version_1_6_43;
-
-/* Tells libpng that we have already handled the first "num_bytes" bytes
- * of the PNG file signature.  If the PNG data is embedded into another
- * stream we can set num_bytes = 8 so that libpng will not attempt to read
- * or write any of the magic bytes before it starts on the IHDR.
- */
-
-#ifdef PNG_READ_SUPPORTED
-void PNGAPI
-png_set_sig_bytes(png_structrp png_ptr, int num_bytes)
-{
-   unsigned int nb = (unsigned int)num_bytes;
-
-   png_debug(1, "in png_set_sig_bytes");
-
-   if (png_ptr == NULL)
-      return;
-
-   if (num_bytes < 0)
-      nb = 0;
-
-   if (nb > 8)
-      png_error(png_ptr, "Too many bytes for PNG signature");
-
-   png_ptr->sig_bytes = (png_byte)nb;
-}
-
-/* Checks whether the supplied bytes match the PNG signature.  We allow
- * checking less than the full 8-byte signature so that those apps that
- * already read the first few bytes of a file to determine the file type
- * can simply check the remaining bytes for extra assurance.  Returns
- * an integer less than, equal to, or greater than zero if sig is found,
- * respectively, to be less than, to match, or be greater than the correct
- * PNG signature (this is the same behavior as strcmp, memcmp, etc).
- */
-int PNGAPI
-png_sig_cmp(png_const_bytep sig, size_t start, size_t num_to_check)
-{
-   static const png_byte png_signature[8] = {137, 80, 78, 71, 13, 10, 26, 10};
-
-   if (num_to_check > 8)
-      num_to_check = 8;
-
-   else if (num_to_check < 1)
-      return -1;
-
-   if (start > 7)
-      return -1;
-
-   if (start + num_to_check > 8)
-      num_to_check = 8 - start;
-
-   return memcmp(&sig[start], &png_signature[start], num_to_check);
-}
-
-#endif /* READ */
-
-#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
-/* Function to allocate memory for zlib */
-PNG_FUNCTION(voidpf /* PRIVATE */,
-png_zalloc,(voidpf png_ptr, uInt items, uInt size),PNG_ALLOCATED)
-{
-   png_alloc_size_t num_bytes = size;
-
-   if (png_ptr == NULL)
-      return NULL;
-
-   if (items >= (~(png_alloc_size_t)0)/size)
-   {
-      png_warning (png_voidcast(png_structrp, png_ptr),
-          "Potential overflow in png_zalloc()");
-      return NULL;
-   }
-
-   num_bytes *= items;
-   return png_malloc_warn(png_voidcast(png_structrp, png_ptr), num_bytes);
-}
-
-/* Function to free memory for zlib */
-void /* PRIVATE */
-png_zfree(voidpf png_ptr, voidpf ptr)
-{
-   png_free(png_voidcast(png_const_structrp,png_ptr), ptr);
-}
-
-/* Reset the CRC variable to 32 bits of 1's.  Care must be taken
- * in case CRC is > 32 bits to leave the top bits 0.
- */
-void /* PRIVATE */
-png_reset_crc(png_structrp png_ptr)
-{
-   /* The cast is safe because the crc is a 32-bit value. */
-   png_ptr->crc = (png_uint_32)crc32(0, Z_NULL, 0);
-}
-
-/* Calculate the CRC over a section of data.  We can only pass as
- * much data to this routine as the largest single buffer size.  We
- * also check that this data will actually be used before going to the
- * trouble of calculating it.
- */
-void /* PRIVATE */
-png_calculate_crc(png_structrp png_ptr, png_const_bytep ptr, size_t length)
-{
-   int need_crc = 1;
-
-   if (PNG_CHUNK_ANCILLARY(png_ptr->chunk_name) != 0)
-   {
-      if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_MASK) ==
-          (PNG_FLAG_CRC_ANCILLARY_USE | PNG_FLAG_CRC_ANCILLARY_NOWARN))
-         need_crc = 0;
-   }
-
-   else /* critical */
-   {
-      if ((png_ptr->flags & PNG_FLAG_CRC_CRITICAL_IGNORE) != 0)
-         need_crc = 0;
-   }
-
-   /* 'uLong' is defined in zlib.h as unsigned long; this means that on some
-    * systems it is a 64-bit value.  crc32, however, returns 32 bits so the
-    * following cast is safe.  'uInt' may be no more than 16 bits, so it is
-    * necessary to perform a loop here.
-    */
-   if (need_crc != 0 && length > 0)
-   {
-      uLong crc = png_ptr->crc; /* Should never issue a warning */
-
-      do
-      {
-         uInt safe_length = (uInt)length;
-#ifndef __COVERITY__
-         if (safe_length == 0)
-            safe_length = (uInt)-1; /* evil, but safe */
-#endif
-
-         crc = crc32(crc, ptr, safe_length);
-
-         /* The following should never issue compiler warnings; if they do the
-          * target system has characteristics that will probably violate other
-          * assumptions within the libpng code.
-          */
-         ptr += safe_length;
-         length -= safe_length;
-      }
-      while (length > 0);
-
-      /* And the following is always safe because the crc is only 32 bits. */
-      png_ptr->crc = (png_uint_32)crc;
-   }
-}
-
-/* Check a user supplied version number, called from both read and write
- * functions that create a png_struct.
- */
-int
-png_user_version_check(png_structrp png_ptr, png_const_charp user_png_ver)
-{
-   /* Libpng versions 1.0.0 and later are binary compatible if the version
-    * string matches through the second '.'; we must recompile any
-    * applications that use any older library version.
-    */
-
-   if (user_png_ver != NULL)
-   {
-      int i = -1;
-      int found_dots = 0;
-
-      do
-      {
-         i++;
-         if (user_png_ver[i] != PNG_LIBPNG_VER_STRING[i])
-            png_ptr->flags |= PNG_FLAG_LIBRARY_MISMATCH;
-         if (user_png_ver[i] == '.')
-            found_dots++;
-      } while (found_dots < 2 && user_png_ver[i] != 0 &&
-            PNG_LIBPNG_VER_STRING[i] != 0);
-   }
-
-   else
-      png_ptr->flags |= PNG_FLAG_LIBRARY_MISMATCH;
-
-   if ((png_ptr->flags & PNG_FLAG_LIBRARY_MISMATCH) != 0)
-   {
-#ifdef PNG_WARNINGS_SUPPORTED
-      size_t pos = 0;
-      char m[128];
-
-      pos = png_safecat(m, (sizeof m), pos,
-          "Application built with libpng-");
-      pos = png_safecat(m, (sizeof m), pos, user_png_ver);
-      pos = png_safecat(m, (sizeof m), pos, " but running with ");
-      pos = png_safecat(m, (sizeof m), pos, PNG_LIBPNG_VER_STRING);
-      PNG_UNUSED(pos)
-
-      png_warning(png_ptr, m);
-#endif
-
-#ifdef PNG_ERROR_NUMBERS_SUPPORTED
-      png_ptr->flags = 0;
-#endif
-
-      return 0;
-   }
-
-   /* Success return. */
-   return 1;
-}
-
-/* Generic function to create a png_struct for either read or write - this
- * contains the common initialization.
- */
-PNG_FUNCTION(png_structp /* PRIVATE */,
-png_create_png_struct,(png_const_charp user_png_ver, png_voidp error_ptr,
-    png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr,
-    png_malloc_ptr malloc_fn, png_free_ptr free_fn),PNG_ALLOCATED)
-{
-   png_struct create_struct;
-#  ifdef PNG_SETJMP_SUPPORTED
-      jmp_buf create_jmp_buf;
-#  endif
-
-   /* This temporary stack-allocated structure is used to provide a place to
-    * build enough context to allow the user provided memory allocator (if any)
-    * to be called.
-    */
-   memset(&create_struct, 0, (sizeof create_struct));
-
-   /* Added at libpng-1.2.6 */
-#  ifdef PNG_USER_LIMITS_SUPPORTED
-      create_struct.user_width_max = PNG_USER_WIDTH_MAX;
-      create_struct.user_height_max = PNG_USER_HEIGHT_MAX;
-
-#     ifdef PNG_USER_CHUNK_CACHE_MAX
-      /* Added at libpng-1.2.43 and 1.4.0 */
-      create_struct.user_chunk_cache_max = PNG_USER_CHUNK_CACHE_MAX;
-#     endif
-
-#     ifdef PNG_USER_CHUNK_MALLOC_MAX
-      /* Added at libpng-1.2.43 and 1.4.1, required only for read but exists
-       * in png_struct regardless.
-       */
-      create_struct.user_chunk_malloc_max = PNG_USER_CHUNK_MALLOC_MAX;
-#     endif
-#  endif
-
-   /* The following two API calls simply set fields in png_struct, so it is safe
-    * to do them now even though error handling is not yet set up.
-    */
-#  ifdef PNG_USER_MEM_SUPPORTED
-      png_set_mem_fn(&create_struct, mem_ptr, malloc_fn, free_fn);
-#  else
-      PNG_UNUSED(mem_ptr)
-      PNG_UNUSED(malloc_fn)
-      PNG_UNUSED(free_fn)
-#  endif
-
-   /* (*error_fn) can return control to the caller after the error_ptr is set,
-    * this will result in a memory leak unless the error_fn does something
-    * extremely sophisticated.  The design lacks merit but is implicit in the
-    * API.
-    */
-   png_set_error_fn(&create_struct, error_ptr, error_fn, warn_fn);
-
-#  ifdef PNG_SETJMP_SUPPORTED
-      if (!setjmp(create_jmp_buf))
-#  endif
-      {
-#  ifdef PNG_SETJMP_SUPPORTED
-         /* Temporarily fake out the longjmp information until we have
-          * successfully completed this function.  This only works if we have
-          * setjmp() support compiled in, but it is safe - this stuff should
-          * never happen.
-          */
-         create_struct.jmp_buf_ptr = &create_jmp_buf;
-         create_struct.jmp_buf_size = 0; /*stack allocation*/
-         create_struct.longjmp_fn = longjmp;
-#  endif
-         /* Call the general version checker (shared with read and write code):
-          */
-         if (png_user_version_check(&create_struct, user_png_ver) != 0)
-         {
-            png_structrp png_ptr = png_voidcast(png_structrp,
-                png_malloc_warn(&create_struct, (sizeof *png_ptr)));
-
-            if (png_ptr != NULL)
-            {
-               /* png_ptr->zstream holds a back-pointer to the png_struct, so
-                * this can only be done now:
-                */
-               create_struct.zstream.zalloc = png_zalloc;
-               create_struct.zstream.zfree = png_zfree;
-               create_struct.zstream.opaque = png_ptr;
-
-#              ifdef PNG_SETJMP_SUPPORTED
-               /* Eliminate the local error handling: */
-               create_struct.jmp_buf_ptr = NULL;
-               create_struct.jmp_buf_size = 0;
-               create_struct.longjmp_fn = 0;
-#              endif
-
-               *png_ptr = create_struct;
-
-               /* This is the successful return point */
-               return png_ptr;
-            }
-         }
-      }
-
-   /* A longjmp because of a bug in the application storage allocator or a
-    * simple failure to allocate the png_struct.
-    */
-   return NULL;
-}
-
-/* Allocate the memory for an info_struct for the application. */
-PNG_FUNCTION(png_infop,PNGAPI
-png_create_info_struct,(png_const_structrp png_ptr),PNG_ALLOCATED)
-{
-   png_inforp info_ptr;
-
-   png_debug(1, "in png_create_info_struct");
-
-   if (png_ptr == NULL)
-      return NULL;
-
-   /* Use the internal API that does not (or at least should not) error out, so
-    * that this call always returns ok.  The application typically sets up the
-    * error handling *after* creating the info_struct because this is the way it
-    * has always been done in 'example.c'.
-    */
-   info_ptr = png_voidcast(png_inforp, png_malloc_base(png_ptr,
-       (sizeof *info_ptr)));
-
-   if (info_ptr != NULL)
-      memset(info_ptr, 0, (sizeof *info_ptr));
-
-   return info_ptr;
-}
-
-/* This function frees the memory associated with a single info struct.
- * Normally, one would use either png_destroy_read_struct() or
- * png_destroy_write_struct() to free an info struct, but this may be
- * useful for some applications.  From libpng 1.6.0 this function is also used
- * internally to implement the png_info release part of the 'struct' destroy
- * APIs.  This ensures that all possible approaches free the same data (all of
- * it).
- */
-void PNGAPI
-png_destroy_info_struct(png_const_structrp png_ptr, png_infopp info_ptr_ptr)
-{
-   png_inforp info_ptr = NULL;
-
-   png_debug(1, "in png_destroy_info_struct");
-
-   if (png_ptr == NULL)
-      return;
-
-   if (info_ptr_ptr != NULL)
-      info_ptr = *info_ptr_ptr;
-
-   if (info_ptr != NULL)
-   {
-      /* Do this first in case of an error below; if the app implements its own
-       * memory management this can lead to png_free calling png_error, which
-       * will abort this routine and return control to the app error handler.
-       * An infinite loop may result if it then tries to free the same info
-       * ptr.
-       */
-      *info_ptr_ptr = NULL;
-
-      png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1);
-      memset(info_ptr, 0, (sizeof *info_ptr));
-      png_free(png_ptr, info_ptr);
-   }
-}
-
-/* Initialize the info structure.  This is now an internal function (0.89)
- * and applications using it are urged to use png_create_info_struct()
- * instead.  Use deprecated in 1.6.0, internal use removed (used internally it
- * is just a memset).
- *
- * NOTE: it is almost inconceivable that this API is used because it bypasses
- * the user-memory mechanism and the user error handling/warning mechanisms in
- * those cases where it does anything other than a memset.
- */
-PNG_FUNCTION(void,PNGAPI
-png_info_init_3,(png_infopp ptr_ptr, size_t png_info_struct_size),
-    PNG_DEPRECATED)
-{
-   png_inforp info_ptr = *ptr_ptr;
-
-   png_debug(1, "in png_info_init_3");
-
-   if (info_ptr == NULL)
-      return;
-
-   if ((sizeof (png_info)) > png_info_struct_size)
-   {
-      *ptr_ptr = NULL;
-      /* The following line is why this API should not be used: */
-      free(info_ptr);
-      info_ptr = png_voidcast(png_inforp, png_malloc_base(NULL,
-          (sizeof *info_ptr)));
-      if (info_ptr == NULL)
-         return;
-      *ptr_ptr = info_ptr;
-   }
-
-   /* Set everything to 0 */
-   memset(info_ptr, 0, (sizeof *info_ptr));
-}
-
-void PNGAPI
-png_data_freer(png_const_structrp png_ptr, png_inforp info_ptr,
-    int freer, png_uint_32 mask)
-{
-   png_debug(1, "in png_data_freer");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   if (freer == PNG_DESTROY_WILL_FREE_DATA)
-      info_ptr->free_me |= mask;
-
-   else if (freer == PNG_USER_WILL_FREE_DATA)
-      info_ptr->free_me &= ~mask;
-
-   else
-      png_error(png_ptr, "Unknown freer parameter in png_data_freer");
-}
-
-void PNGAPI
-png_free_data(png_const_structrp png_ptr, png_inforp info_ptr, png_uint_32 mask,
-    int num)
-{
-   png_debug(1, "in png_free_data");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-#ifdef PNG_TEXT_SUPPORTED
-   /* Free text item num or (if num == -1) all text items */
-   if (info_ptr->text != NULL &&
-       ((mask & PNG_FREE_TEXT) & info_ptr->free_me) != 0)
-   {
-      if (num != -1)
-      {
-         png_free(png_ptr, info_ptr->text[num].key);
-         info_ptr->text[num].key = NULL;
-      }
-
-      else
-      {
-         int i;
-
-         for (i = 0; i < info_ptr->num_text; i++)
-            png_free(png_ptr, info_ptr->text[i].key);
-
-         png_free(png_ptr, info_ptr->text);
-         info_ptr->text = NULL;
-         info_ptr->num_text = 0;
-         info_ptr->max_text = 0;
-      }
-   }
-#endif
-
-#ifdef PNG_tRNS_SUPPORTED
-   /* Free any tRNS entry */
-   if (((mask & PNG_FREE_TRNS) & info_ptr->free_me) != 0)
-   {
-      info_ptr->valid &= ~PNG_INFO_tRNS;
-      png_free(png_ptr, info_ptr->trans_alpha);
-      info_ptr->trans_alpha = NULL;
-      info_ptr->num_trans = 0;
-   }
-#endif
-
-#ifdef PNG_sCAL_SUPPORTED
-   /* Free any sCAL entry */
-   if (((mask & PNG_FREE_SCAL) & info_ptr->free_me) != 0)
-   {
-      png_free(png_ptr, info_ptr->scal_s_width);
-      png_free(png_ptr, info_ptr->scal_s_height);
-      info_ptr->scal_s_width = NULL;
-      info_ptr->scal_s_height = NULL;
-      info_ptr->valid &= ~PNG_INFO_sCAL;
-   }
-#endif
-
-#ifdef PNG_pCAL_SUPPORTED
-   /* Free any pCAL entry */
-   if (((mask & PNG_FREE_PCAL) & info_ptr->free_me) != 0)
-   {
-      png_free(png_ptr, info_ptr->pcal_purpose);
-      png_free(png_ptr, info_ptr->pcal_units);
-      info_ptr->pcal_purpose = NULL;
-      info_ptr->pcal_units = NULL;
-
-      if (info_ptr->pcal_params != NULL)
-         {
-            int i;
-
-            for (i = 0; i < info_ptr->pcal_nparams; i++)
-               png_free(png_ptr, info_ptr->pcal_params[i]);
-
-            png_free(png_ptr, info_ptr->pcal_params);
-            info_ptr->pcal_params = NULL;
-         }
-      info_ptr->valid &= ~PNG_INFO_pCAL;
-   }
-#endif
-
-#ifdef PNG_iCCP_SUPPORTED
-   /* Free any profile entry */
-   if (((mask & PNG_FREE_ICCP) & info_ptr->free_me) != 0)
-   {
-      png_free(png_ptr, info_ptr->iccp_name);
-      png_free(png_ptr, info_ptr->iccp_profile);
-      info_ptr->iccp_name = NULL;
-      info_ptr->iccp_profile = NULL;
-      info_ptr->valid &= ~PNG_INFO_iCCP;
-   }
-#endif
-
-#ifdef PNG_sPLT_SUPPORTED
-   /* Free a given sPLT entry, or (if num == -1) all sPLT entries */
-   if (info_ptr->splt_palettes != NULL &&
-       ((mask & PNG_FREE_SPLT) & info_ptr->free_me) != 0)
-   {
-      if (num != -1)
-      {
-         png_free(png_ptr, info_ptr->splt_palettes[num].name);
-         png_free(png_ptr, info_ptr->splt_palettes[num].entries);
-         info_ptr->splt_palettes[num].name = NULL;
-         info_ptr->splt_palettes[num].entries = NULL;
-      }
-
-      else
-      {
-         int i;
-
-         for (i = 0; i < info_ptr->splt_palettes_num; i++)
-         {
-            png_free(png_ptr, info_ptr->splt_palettes[i].name);
-            png_free(png_ptr, info_ptr->splt_palettes[i].entries);
-         }
-
-         png_free(png_ptr, info_ptr->splt_palettes);
-         info_ptr->splt_palettes = NULL;
-         info_ptr->splt_palettes_num = 0;
-         info_ptr->valid &= ~PNG_INFO_sPLT;
-      }
-   }
-#endif
-
-#ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED
-   if (info_ptr->unknown_chunks != NULL &&
-       ((mask & PNG_FREE_UNKN) & info_ptr->free_me) != 0)
-   {
-      if (num != -1)
-      {
-          png_free(png_ptr, info_ptr->unknown_chunks[num].data);
-          info_ptr->unknown_chunks[num].data = NULL;
-      }
-
-      else
-      {
-         int i;
-
-         for (i = 0; i < info_ptr->unknown_chunks_num; i++)
-            png_free(png_ptr, info_ptr->unknown_chunks[i].data);
-
-         png_free(png_ptr, info_ptr->unknown_chunks);
-         info_ptr->unknown_chunks = NULL;
-         info_ptr->unknown_chunks_num = 0;
-      }
-   }
-#endif
-
-#ifdef PNG_eXIf_SUPPORTED
-   /* Free any eXIf entry */
-   if (((mask & PNG_FREE_EXIF) & info_ptr->free_me) != 0)
-   {
-# ifdef PNG_READ_eXIf_SUPPORTED
-      if (info_ptr->eXIf_buf)
-      {
-         png_free(png_ptr, info_ptr->eXIf_buf);
-         info_ptr->eXIf_buf = NULL;
-      }
-# endif
-      if (info_ptr->exif)
-      {
-         png_free(png_ptr, info_ptr->exif);
-         info_ptr->exif = NULL;
-      }
-      info_ptr->valid &= ~PNG_INFO_eXIf;
-   }
-#endif
-
-#ifdef PNG_hIST_SUPPORTED
-   /* Free any hIST entry */
-   if (((mask & PNG_FREE_HIST) & info_ptr->free_me) != 0)
-   {
-      png_free(png_ptr, info_ptr->hist);
-      info_ptr->hist = NULL;
-      info_ptr->valid &= ~PNG_INFO_hIST;
-   }
-#endif
-
-   /* Free any PLTE entry that was internally allocated */
-   if (((mask & PNG_FREE_PLTE) & info_ptr->free_me) != 0)
-   {
-      png_free(png_ptr, info_ptr->palette);
-      info_ptr->palette = NULL;
-      info_ptr->valid &= ~PNG_INFO_PLTE;
-      info_ptr->num_palette = 0;
-   }
-
-#ifdef PNG_INFO_IMAGE_SUPPORTED
-   /* Free any image bits attached to the info structure */
-   if (((mask & PNG_FREE_ROWS) & info_ptr->free_me) != 0)
-   {
-      if (info_ptr->row_pointers != NULL)
-      {
-         png_uint_32 row;
-         for (row = 0; row < info_ptr->height; row++)
-            png_free(png_ptr, info_ptr->row_pointers[row]);
-
-         png_free(png_ptr, info_ptr->row_pointers);
-         info_ptr->row_pointers = NULL;
-      }
-      info_ptr->valid &= ~PNG_INFO_IDAT;
-   }
-#endif
-
-   if (num != -1)
-      mask &= ~PNG_FREE_MUL;
-
-   info_ptr->free_me &= ~mask;
-}
-#endif /* READ || WRITE */
-
-/* This function returns a pointer to the io_ptr associated with the user
- * functions.  The application should free any memory associated with this
- * pointer before png_write_destroy() or png_read_destroy() are called.
- */
-png_voidp PNGAPI
-png_get_io_ptr(png_const_structrp png_ptr)
-{
-   if (png_ptr == NULL)
-      return NULL;
-
-   return png_ptr->io_ptr;
-}
-
-#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
-#  ifdef PNG_STDIO_SUPPORTED
-/* Initialize the default input/output functions for the PNG file.  If you
- * use your own read or write routines, you can call either png_set_read_fn()
- * or png_set_write_fn() instead of png_init_io().  If you have defined
- * PNG_NO_STDIO or otherwise disabled PNG_STDIO_SUPPORTED, you must use a
- * function of your own because "FILE *" isn't necessarily available.
- */
-void PNGAPI
-png_init_io(png_structrp png_ptr, png_FILE_p fp)
-{
-   png_debug(1, "in png_init_io");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->io_ptr = (png_voidp)fp;
-}
-#  endif
-
-#  ifdef PNG_SAVE_INT_32_SUPPORTED
-/* PNG signed integers are saved in 32-bit 2's complement format.  ANSI C-90
- * defines a cast of a signed integer to an unsigned integer either to preserve
- * the value, if it is positive, or to calculate:
- *
- *     (UNSIGNED_MAX+1) + integer
- *
- * Where UNSIGNED_MAX is the appropriate maximum unsigned value, so when the
- * negative integral value is added the result will be an unsigned value
- * corresponding to the 2's complement representation.
- */
-void PNGAPI
-png_save_int_32(png_bytep buf, png_int_32 i)
-{
-   png_save_uint_32(buf, (png_uint_32)i);
-}
-#  endif
-
-#  ifdef PNG_TIME_RFC1123_SUPPORTED
-/* Convert the supplied time into an RFC 1123 string suitable for use in
- * a "Creation Time" or other text-based time string.
- */
-int PNGAPI
-png_convert_to_rfc1123_buffer(char out[29], png_const_timep ptime)
-{
-   static const char short_months[12][4] =
-        {"Jan", "Feb", "Mar", "Apr", "May", "Jun",
-         "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
-
-   if (out == NULL)
-      return 0;
-
-   if (ptime->year > 9999 /* RFC1123 limitation */ ||
-       ptime->month == 0    ||  ptime->month > 12  ||
-       ptime->day   == 0    ||  ptime->day   > 31  ||
-       ptime->hour  > 23    ||  ptime->minute > 59 ||
-       ptime->second > 60)
-      return 0;
-
-   {
-      size_t pos = 0;
-      char number_buf[5] = {0, 0, 0, 0, 0}; /* enough for a four-digit year */
-
-#     define APPEND_STRING(string) pos = png_safecat(out, 29, pos, (string))
-#     define APPEND_NUMBER(format, value)\
-         APPEND_STRING(PNG_FORMAT_NUMBER(number_buf, format, (value)))
-#     define APPEND(ch) if (pos < 28) out[pos++] = (ch)
-
-      APPEND_NUMBER(PNG_NUMBER_FORMAT_u, (unsigned)ptime->day);
-      APPEND(' ');
-      APPEND_STRING(short_months[(ptime->month - 1)]);
-      APPEND(' ');
-      APPEND_NUMBER(PNG_NUMBER_FORMAT_u, ptime->year);
-      APPEND(' ');
-      APPEND_NUMBER(PNG_NUMBER_FORMAT_02u, (unsigned)ptime->hour);
-      APPEND(':');
-      APPEND_NUMBER(PNG_NUMBER_FORMAT_02u, (unsigned)ptime->minute);
-      APPEND(':');
-      APPEND_NUMBER(PNG_NUMBER_FORMAT_02u, (unsigned)ptime->second);
-      APPEND_STRING(" +0000"); /* This reliably terminates the buffer */
-      PNG_UNUSED (pos)
-
-#     undef APPEND
-#     undef APPEND_NUMBER
-#     undef APPEND_STRING
-   }
-
-   return 1;
-}
-
-#    if PNG_LIBPNG_VER < 10700
-/* To do: remove the following from libpng-1.7 */
-/* Original API that uses a private buffer in png_struct.
- * Deprecated because it causes png_struct to carry a spurious temporary
- * buffer (png_struct::time_buffer), better to have the caller pass this in.
- */
-png_const_charp PNGAPI
-png_convert_to_rfc1123(png_structrp png_ptr, png_const_timep ptime)
-{
-   if (png_ptr != NULL)
-   {
-      /* The only failure above if png_ptr != NULL is from an invalid ptime */
-      if (png_convert_to_rfc1123_buffer(png_ptr->time_buffer, ptime) == 0)
-         png_warning(png_ptr, "Ignoring invalid time value");
-
-      else
-         return png_ptr->time_buffer;
-   }
-
-   return NULL;
-}
-#    endif /* LIBPNG_VER < 10700 */
-#  endif /* TIME_RFC1123 */
-
-#endif /* READ || WRITE */
-
-png_const_charp PNGAPI
-png_get_copyright(png_const_structrp png_ptr)
-{
-   PNG_UNUSED(png_ptr)  /* Silence compiler warning about unused png_ptr */
-#ifdef PNG_STRING_COPYRIGHT
-   return PNG_STRING_COPYRIGHT
-#else
-   return PNG_STRING_NEWLINE \
-      "libpng version 1.6.43" PNG_STRING_NEWLINE \
-      "Copyright (c) 2018-2024 Cosmin Truta" PNG_STRING_NEWLINE \
-      "Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson" \
-      PNG_STRING_NEWLINE \
-      "Copyright (c) 1996-1997 Andreas Dilger" PNG_STRING_NEWLINE \
-      "Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc." \
-      PNG_STRING_NEWLINE;
-#endif
-}
-
-/* The following return the library version as a short string in the
- * format 1.0.0 through 99.99.99zz.  To get the version of *.h files
- * used with your application, print out PNG_LIBPNG_VER_STRING, which
- * is defined in png.h.
- * Note: now there is no difference between png_get_libpng_ver() and
- * png_get_header_ver().  Due to the version_nn_nn_nn typedef guard,
- * it is guaranteed that png.c uses the correct version of png.h.
- */
-png_const_charp PNGAPI
-png_get_libpng_ver(png_const_structrp png_ptr)
-{
-   /* Version of *.c files used when building libpng */
-   return png_get_header_ver(png_ptr);
-}
-
-png_const_charp PNGAPI
-png_get_header_ver(png_const_structrp png_ptr)
-{
-   /* Version of *.h files used when building libpng */
-   PNG_UNUSED(png_ptr)  /* Silence compiler warning about unused png_ptr */
-   return PNG_LIBPNG_VER_STRING;
-}
-
-png_const_charp PNGAPI
-png_get_header_version(png_const_structrp png_ptr)
-{
-   /* Returns longer string containing both version and date */
-   PNG_UNUSED(png_ptr)  /* Silence compiler warning about unused png_ptr */
-#ifdef __STDC__
-   return PNG_HEADER_VERSION_STRING
-#  ifndef PNG_READ_SUPPORTED
-      " (NO READ SUPPORT)"
-#  endif
-      PNG_STRING_NEWLINE;
-#else
-   return PNG_HEADER_VERSION_STRING;
-#endif
-}
-
-#ifdef PNG_BUILD_GRAYSCALE_PALETTE_SUPPORTED
-/* NOTE: this routine is not used internally! */
-/* Build a grayscale palette.  Palette is assumed to be 1 << bit_depth
- * large of png_color.  This lets grayscale images be treated as
- * paletted.  Most useful for gamma correction and simplification
- * of code.  This API is not used internally.
- */
-void PNGAPI
-png_build_grayscale_palette(int bit_depth, png_colorp palette)
-{
-   int num_palette;
-   int color_inc;
-   int i;
-   int v;
-
-   png_debug(1, "in png_do_build_grayscale_palette");
-
-   if (palette == NULL)
-      return;
-
-   switch (bit_depth)
-   {
-      case 1:
-         num_palette = 2;
-         color_inc = 0xff;
-         break;
-
-      case 2:
-         num_palette = 4;
-         color_inc = 0x55;
-         break;
-
-      case 4:
-         num_palette = 16;
-         color_inc = 0x11;
-         break;
-
-      case 8:
-         num_palette = 256;
-         color_inc = 1;
-         break;
-
-      default:
-         num_palette = 0;
-         color_inc = 0;
-         break;
-   }
-
-   for (i = 0, v = 0; i < num_palette; i++, v += color_inc)
-   {
-      palette[i].red = (png_byte)(v & 0xff);
-      palette[i].green = (png_byte)(v & 0xff);
-      palette[i].blue = (png_byte)(v & 0xff);
-   }
-}
-#endif
-
-#ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED
-int PNGAPI
-png_handle_as_unknown(png_const_structrp png_ptr, png_const_bytep chunk_name)
-{
-   /* Check chunk_name and return "keep" value if it's on the list, else 0 */
-   png_const_bytep p, p_end;
-
-   if (png_ptr == NULL || chunk_name == NULL || png_ptr->num_chunk_list == 0)
-      return PNG_HANDLE_CHUNK_AS_DEFAULT;
-
-   p_end = png_ptr->chunk_list;
-   p = p_end + png_ptr->num_chunk_list*5; /* beyond end */
-
-   /* The code is the fifth byte after each four byte string.  Historically this
-    * code was always searched from the end of the list, this is no longer
-    * necessary because the 'set' routine handles duplicate entries correctly.
-    */
-   do /* num_chunk_list > 0, so at least one */
-   {
-      p -= 5;
-
-      if (memcmp(chunk_name, p, 4) == 0)
-         return p[4];
-   }
-   while (p > p_end);
-
-   /* This means that known chunks should be processed and unknown chunks should
-    * be handled according to the value of png_ptr->unknown_default; this can be
-    * confusing because, as a result, there are two levels of defaulting for
-    * unknown chunks.
-    */
-   return PNG_HANDLE_CHUNK_AS_DEFAULT;
-}
-
-#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED) ||\
-   defined(PNG_HANDLE_AS_UNKNOWN_SUPPORTED)
-int /* PRIVATE */
-png_chunk_unknown_handling(png_const_structrp png_ptr, png_uint_32 chunk_name)
-{
-   png_byte chunk_string[5];
-
-   PNG_CSTRING_FROM_CHUNK(chunk_string, chunk_name);
-   return png_handle_as_unknown(png_ptr, chunk_string);
-}
-#endif /* READ_UNKNOWN_CHUNKS || HANDLE_AS_UNKNOWN */
-#endif /* SET_UNKNOWN_CHUNKS */
-
-#ifdef PNG_READ_SUPPORTED
-/* This function, added to libpng-1.0.6g, is untested. */
-int PNGAPI
-png_reset_zstream(png_structrp png_ptr)
-{
-   if (png_ptr == NULL)
-      return Z_STREAM_ERROR;
-
-   /* WARNING: this resets the window bits to the maximum! */
-   return inflateReset(&png_ptr->zstream);
-}
-#endif /* READ */
-
-/* This function was added to libpng-1.0.7 */
-png_uint_32 PNGAPI
-png_access_version_number(void)
-{
-   /* Version of *.c files used when building libpng */
-   return (png_uint_32)PNG_LIBPNG_VER;
-}
-
-#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
-/* Ensure that png_ptr->zstream.msg holds some appropriate error message string.
- * If it doesn't 'ret' is used to set it to something appropriate, even in cases
- * like Z_OK or Z_STREAM_END where the error code is apparently a success code.
- */
-void /* PRIVATE */
-png_zstream_error(png_structrp png_ptr, int ret)
-{
-   /* Translate 'ret' into an appropriate error string, priority is given to the
-    * one in zstream if set.  This always returns a string, even in cases like
-    * Z_OK or Z_STREAM_END where the error code is a success code.
-    */
-   if (png_ptr->zstream.msg == NULL) switch (ret)
-   {
-      default:
-      case Z_OK:
-         png_ptr->zstream.msg = PNGZ_MSG_CAST("unexpected zlib return code");
-         break;
-
-      case Z_STREAM_END:
-         /* Normal exit */
-         png_ptr->zstream.msg = PNGZ_MSG_CAST("unexpected end of LZ stream");
-         break;
-
-      case Z_NEED_DICT:
-         /* This means the deflate stream did not have a dictionary; this
-          * indicates a bogus PNG.
-          */
-         png_ptr->zstream.msg = PNGZ_MSG_CAST("missing LZ dictionary");
-         break;
-
-      case Z_ERRNO:
-         /* gz APIs only: should not happen */
-         png_ptr->zstream.msg = PNGZ_MSG_CAST("zlib IO error");
-         break;
-
-      case Z_STREAM_ERROR:
-         /* internal libpng error */
-         png_ptr->zstream.msg = PNGZ_MSG_CAST("bad parameters to zlib");
-         break;
-
-      case Z_DATA_ERROR:
-         png_ptr->zstream.msg = PNGZ_MSG_CAST("damaged LZ stream");
-         break;
-
-      case Z_MEM_ERROR:
-         png_ptr->zstream.msg = PNGZ_MSG_CAST("insufficient memory");
-         break;
-
-      case Z_BUF_ERROR:
-         /* End of input or output; not a problem if the caller is doing
-          * incremental read or write.
-          */
-         png_ptr->zstream.msg = PNGZ_MSG_CAST("truncated");
-         break;
-
-      case Z_VERSION_ERROR:
-         png_ptr->zstream.msg = PNGZ_MSG_CAST("unsupported zlib version");
-         break;
-
-      case PNG_UNEXPECTED_ZLIB_RETURN:
-         /* Compile errors here mean that zlib now uses the value co-opted in
-          * pngpriv.h for PNG_UNEXPECTED_ZLIB_RETURN; update the switch above
-          * and change pngpriv.h.  Note that this message is "... return",
-          * whereas the default/Z_OK one is "... return code".
-          */
-         png_ptr->zstream.msg = PNGZ_MSG_CAST("unexpected zlib return");
-         break;
-   }
-}
-
-/* png_convert_size: a PNGAPI but no longer in png.h, so deleted
- * at libpng 1.5.5!
- */
-
-/* Added at libpng version 1.2.34 and 1.4.0 (moved from pngset.c) */
-#ifdef PNG_GAMMA_SUPPORTED /* always set if COLORSPACE */
-static int
-png_colorspace_check_gamma(png_const_structrp png_ptr,
-    png_colorspacerp colorspace, png_fixed_point gAMA, int from)
-   /* This is called to check a new gamma value against an existing one.  The
-    * routine returns false if the new gamma value should not be written.
-    *
-    * 'from' says where the new gamma value comes from:
-    *
-    *    0: the new gamma value is the libpng estimate for an ICC profile
-    *    1: the new gamma value comes from a gAMA chunk
-    *    2: the new gamma value comes from an sRGB chunk
-    */
-{
-   png_fixed_point gtest;
-
-   if ((colorspace->flags & PNG_COLORSPACE_HAVE_GAMMA) != 0 &&
-       (png_muldiv(&gtest, colorspace->gamma, PNG_FP_1, gAMA) == 0  ||
-      png_gamma_significant(gtest) != 0))
-   {
-      /* Either this is an sRGB image, in which case the calculated gamma
-       * approximation should match, or this is an image with a profile and the
-       * value libpng calculates for the gamma of the profile does not match the
-       * value recorded in the file.  The former, sRGB, case is an error, the
-       * latter is just a warning.
-       */
-      if ((colorspace->flags & PNG_COLORSPACE_FROM_sRGB) != 0 || from == 2)
-      {
-         png_chunk_report(png_ptr, "gamma value does not match sRGB",
-             PNG_CHUNK_ERROR);
-         /* Do not overwrite an sRGB value */
-         return from == 2;
-      }
-
-      else /* sRGB tag not involved */
-      {
-         png_chunk_report(png_ptr, "gamma value does not match libpng estimate",
-             PNG_CHUNK_WARNING);
-         return from == 1;
-      }
-   }
-
-   return 1;
-}
-
-void /* PRIVATE */
-png_colorspace_set_gamma(png_const_structrp png_ptr,
-    png_colorspacerp colorspace, png_fixed_point gAMA)
-{
-   /* Changed in libpng-1.5.4 to limit the values to ensure overflow can't
-    * occur.  Since the fixed point representation is asymmetrical it is
-    * possible for 1/gamma to overflow the limit of 21474 and this means the
-    * gamma value must be at least 5/100000 and hence at most 20000.0.  For
-    * safety the limits here are a little narrower.  The values are 0.00016 to
-    * 6250.0, which are truly ridiculous gamma values (and will produce
-    * displays that are all black or all white.)
-    *
-    * In 1.6.0 this test replaces the ones in pngrutil.c, in the gAMA chunk
-    * handling code, which only required the value to be >0.
-    */
-   png_const_charp errmsg;
-
-   if (gAMA < 16 || gAMA > 625000000)
-      errmsg = "gamma value out of range";
-
-#  ifdef PNG_READ_gAMA_SUPPORTED
-   /* Allow the application to set the gamma value more than once */
-   else if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0 &&
-      (colorspace->flags & PNG_COLORSPACE_FROM_gAMA) != 0)
-      errmsg = "duplicate";
-#  endif
-
-   /* Do nothing if the colorspace is already invalid */
-   else if ((colorspace->flags & PNG_COLORSPACE_INVALID) != 0)
-      return;
-
-   else
-   {
-      if (png_colorspace_check_gamma(png_ptr, colorspace, gAMA,
-          1/*from gAMA*/) != 0)
-      {
-         /* Store this gamma value. */
-         colorspace->gamma = gAMA;
-         colorspace->flags |=
-            (PNG_COLORSPACE_HAVE_GAMMA | PNG_COLORSPACE_FROM_gAMA);
-      }
-
-      /* At present if the check_gamma test fails the gamma of the colorspace is
-       * not updated however the colorspace is not invalidated.  This
-       * corresponds to the case where the existing gamma comes from an sRGB
-       * chunk or profile.  An error message has already been output.
-       */
-      return;
-   }
-
-   /* Error exit - errmsg has been set. */
-   colorspace->flags |= PNG_COLORSPACE_INVALID;
-   png_chunk_report(png_ptr, errmsg, PNG_CHUNK_WRITE_ERROR);
-}
-
-void /* PRIVATE */
-png_colorspace_sync_info(png_const_structrp png_ptr, png_inforp info_ptr)
-{
-   if ((info_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) != 0)
-   {
-      /* Everything is invalid */
-      info_ptr->valid &= ~(PNG_INFO_gAMA|PNG_INFO_cHRM|PNG_INFO_sRGB|
-         PNG_INFO_iCCP);
-
-#     ifdef PNG_COLORSPACE_SUPPORTED
-      /* Clean up the iCCP profile now if it won't be used. */
-      png_free_data(png_ptr, info_ptr, PNG_FREE_ICCP, -1/*not used*/);
-#     else
-      PNG_UNUSED(png_ptr)
-#     endif
-   }
-
-   else
-   {
-#     ifdef PNG_COLORSPACE_SUPPORTED
-      /* Leave the INFO_iCCP flag set if the pngset.c code has already set
-       * it; this allows a PNG to contain a profile which matches sRGB and
-       * yet still have that profile retrievable by the application.
-       */
-      if ((info_ptr->colorspace.flags & PNG_COLORSPACE_MATCHES_sRGB) != 0)
-         info_ptr->valid |= PNG_INFO_sRGB;
-
-      else
-         info_ptr->valid &= ~PNG_INFO_sRGB;
-
-      if ((info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0)
-         info_ptr->valid |= PNG_INFO_cHRM;
-
-      else
-         info_ptr->valid &= ~PNG_INFO_cHRM;
-#     endif
-
-      if ((info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_GAMMA) != 0)
-         info_ptr->valid |= PNG_INFO_gAMA;
-
-      else
-         info_ptr->valid &= ~PNG_INFO_gAMA;
-   }
-}
-
-#ifdef PNG_READ_SUPPORTED
-void /* PRIVATE */
-png_colorspace_sync(png_const_structrp png_ptr, png_inforp info_ptr)
-{
-   if (info_ptr == NULL) /* reduce code size; check here not in the caller */
-      return;
-
-   info_ptr->colorspace = png_ptr->colorspace;
-   png_colorspace_sync_info(png_ptr, info_ptr);
-}
-#endif
-#endif /* GAMMA */
-
-#ifdef PNG_COLORSPACE_SUPPORTED
-/* Added at libpng-1.5.5 to support read and write of true CIEXYZ values for
- * cHRM, as opposed to using chromaticities.  These internal APIs return
- * non-zero on a parameter error.  The X, Y and Z values are required to be
- * positive and less than 1.0.
- */
-static int
-png_xy_from_XYZ(png_xy *xy, const png_XYZ *XYZ)
-{
-   png_int_32 d, dwhite, whiteX, whiteY;
-
-   d = XYZ->red_X + XYZ->red_Y + XYZ->red_Z;
-   if (png_muldiv(&xy->redx, XYZ->red_X, PNG_FP_1, d) == 0)
-      return 1;
-   if (png_muldiv(&xy->redy, XYZ->red_Y, PNG_FP_1, d) == 0)
-      return 1;
-   dwhite = d;
-   whiteX = XYZ->red_X;
-   whiteY = XYZ->red_Y;
-
-   d = XYZ->green_X + XYZ->green_Y + XYZ->green_Z;
-   if (png_muldiv(&xy->greenx, XYZ->green_X, PNG_FP_1, d) == 0)
-      return 1;
-   if (png_muldiv(&xy->greeny, XYZ->green_Y, PNG_FP_1, d) == 0)
-      return 1;
-   dwhite += d;
-   whiteX += XYZ->green_X;
-   whiteY += XYZ->green_Y;
-
-   d = XYZ->blue_X + XYZ->blue_Y + XYZ->blue_Z;
-   if (png_muldiv(&xy->bluex, XYZ->blue_X, PNG_FP_1, d) == 0)
-      return 1;
-   if (png_muldiv(&xy->bluey, XYZ->blue_Y, PNG_FP_1, d) == 0)
-      return 1;
-   dwhite += d;
-   whiteX += XYZ->blue_X;
-   whiteY += XYZ->blue_Y;
-
-   /* The reference white is simply the sum of the end-point (X,Y,Z) vectors,
-    * thus:
-    */
-   if (png_muldiv(&xy->whitex, whiteX, PNG_FP_1, dwhite) == 0)
-      return 1;
-   if (png_muldiv(&xy->whitey, whiteY, PNG_FP_1, dwhite) == 0)
-      return 1;
-
-   return 0;
-}
-
-static int
-png_XYZ_from_xy(png_XYZ *XYZ, const png_xy *xy)
-{
-   png_fixed_point red_inverse, green_inverse, blue_scale;
-   png_fixed_point left, right, denominator;
-
-   /* Check xy and, implicitly, z.  Note that wide gamut color spaces typically
-    * have end points with 0 tristimulus values (these are impossible end
-    * points, but they are used to cover the possible colors).  We check
-    * xy->whitey against 5, not 0, to avoid a possible integer overflow.
-    */
-   if (xy->redx   < 0 || xy->redx > PNG_FP_1) return 1;
-   if (xy->redy   < 0 || xy->redy > PNG_FP_1-xy->redx) return 1;
-   if (xy->greenx < 0 || xy->greenx > PNG_FP_1) return 1;
-   if (xy->greeny < 0 || xy->greeny > PNG_FP_1-xy->greenx) return 1;
-   if (xy->bluex  < 0 || xy->bluex > PNG_FP_1) return 1;
-   if (xy->bluey  < 0 || xy->bluey > PNG_FP_1-xy->bluex) return 1;
-   if (xy->whitex < 0 || xy->whitex > PNG_FP_1) return 1;
-   if (xy->whitey < 5 || xy->whitey > PNG_FP_1-xy->whitex) return 1;
-
-   /* The reverse calculation is more difficult because the original tristimulus
-    * value had 9 independent values (red,green,blue)x(X,Y,Z) however only 8
-    * derived values were recorded in the cHRM chunk;
-    * (red,green,blue,white)x(x,y).  This loses one degree of freedom and
-    * therefore an arbitrary ninth value has to be introduced to undo the
-    * original transformations.
-    *
-    * Think of the original end-points as points in (X,Y,Z) space.  The
-    * chromaticity values (c) have the property:
-    *
-    *           C
-    *   c = ---------
-    *       X + Y + Z
-    *
-    * For each c (x,y,z) from the corresponding original C (X,Y,Z).  Thus the
-    * three chromaticity values (x,y,z) for each end-point obey the
-    * relationship:
-    *
-    *   x + y + z = 1
-    *
-    * This describes the plane in (X,Y,Z) space that intersects each axis at the
-    * value 1.0; call this the chromaticity plane.  Thus the chromaticity
-    * calculation has scaled each end-point so that it is on the x+y+z=1 plane
-    * and chromaticity is the intersection of the vector from the origin to the
-    * (X,Y,Z) value with the chromaticity plane.
-    *
-    * To fully invert the chromaticity calculation we would need the three
-    * end-point scale factors, (red-scale, green-scale, blue-scale), but these
-    * were not recorded.  Instead we calculated the reference white (X,Y,Z) and
-    * recorded the chromaticity of this.  The reference white (X,Y,Z) would have
-    * given all three of the scale factors since:
-    *
-    *    color-C = color-c * color-scale
-    *    white-C = red-C + green-C + blue-C
-    *            = red-c*red-scale + green-c*green-scale + blue-c*blue-scale
-    *
-    * But cHRM records only white-x and white-y, so we have lost the white scale
-    * factor:
-    *
-    *    white-C = white-c*white-scale
-    *
-    * To handle this the inverse transformation makes an arbitrary assumption
-    * about white-scale:
-    *
-    *    Assume: white-Y = 1.0
-    *    Hence:  white-scale = 1/white-y
-    *    Or:     red-Y + green-Y + blue-Y = 1.0
-    *
-    * Notice the last statement of the assumption gives an equation in three of
-    * the nine values we want to calculate.  8 more equations come from the
-    * above routine as summarised at the top above (the chromaticity
-    * calculation):
-    *
-    *    Given: color-x = color-X / (color-X + color-Y + color-Z)
-    *    Hence: (color-x - 1)*color-X + color.x*color-Y + color.x*color-Z = 0
-    *
-    * This is 9 simultaneous equations in the 9 variables "color-C" and can be
-    * solved by Cramer's rule.  Cramer's rule requires calculating 10 9x9 matrix
-    * determinants, however this is not as bad as it seems because only 28 of
-    * the total of 90 terms in the various matrices are non-zero.  Nevertheless
-    * Cramer's rule is notoriously numerically unstable because the determinant
-    * calculation involves the difference of large, but similar, numbers.  It is
-    * difficult to be sure that the calculation is stable for real world values
-    * and it is certain that it becomes unstable where the end points are close
-    * together.
-    *
-    * So this code uses the perhaps slightly less optimal but more
-    * understandable and totally obvious approach of calculating color-scale.
-    *
-    * This algorithm depends on the precision in white-scale and that is
-    * (1/white-y), so we can immediately see that as white-y approaches 0 the
-    * accuracy inherent in the cHRM chunk drops off substantially.
-    *
-    * libpng arithmetic: a simple inversion of the above equations
-    * ------------------------------------------------------------
-    *
-    *    white_scale = 1/white-y
-    *    white-X = white-x * white-scale
-    *    white-Y = 1.0
-    *    white-Z = (1 - white-x - white-y) * white_scale
-    *
-    *    white-C = red-C + green-C + blue-C
-    *            = red-c*red-scale + green-c*green-scale + blue-c*blue-scale
-    *
-    * This gives us three equations in (red-scale,green-scale,blue-scale) where
-    * all the coefficients are now known:
-    *
-    *    red-x*red-scale + green-x*green-scale + blue-x*blue-scale
-    *       = white-x/white-y
-    *    red-y*red-scale + green-y*green-scale + blue-y*blue-scale = 1
-    *    red-z*red-scale + green-z*green-scale + blue-z*blue-scale
-    *       = (1 - white-x - white-y)/white-y
-    *
-    * In the last equation color-z is (1 - color-x - color-y) so we can add all
-    * three equations together to get an alternative third:
-    *
-    *    red-scale + green-scale + blue-scale = 1/white-y = white-scale
-    *
-    * So now we have a Cramer's rule solution where the determinants are just
-    * 3x3 - far more tractible.  Unfortunately 3x3 determinants still involve
-    * multiplication of three coefficients so we can't guarantee to avoid
-    * overflow in the libpng fixed point representation.  Using Cramer's rule in
-    * floating point is probably a good choice here, but it's not an option for
-    * fixed point.  Instead proceed to simplify the first two equations by
-    * eliminating what is likely to be the largest value, blue-scale:
-    *
-    *    blue-scale = white-scale - red-scale - green-scale
-    *
-    * Hence:
-    *
-    *    (red-x - blue-x)*red-scale + (green-x - blue-x)*green-scale =
-    *                (white-x - blue-x)*white-scale
-    *
-    *    (red-y - blue-y)*red-scale + (green-y - blue-y)*green-scale =
-    *                1 - blue-y*white-scale
-    *
-    * And now we can trivially solve for (red-scale,green-scale):
-    *
-    *    green-scale =
-    *                (white-x - blue-x)*white-scale - (red-x - blue-x)*red-scale
-    *                -----------------------------------------------------------
-    *                                  green-x - blue-x
-    *
-    *    red-scale =
-    *                1 - blue-y*white-scale - (green-y - blue-y) * green-scale
-    *                ---------------------------------------------------------
-    *                                  red-y - blue-y
-    *
-    * Hence:
-    *
-    *    red-scale =
-    *          ( (green-x - blue-x) * (white-y - blue-y) -
-    *            (green-y - blue-y) * (white-x - blue-x) ) / white-y
-    * -------------------------------------------------------------------------
-    *  (green-x - blue-x)*(red-y - blue-y)-(green-y - blue-y)*(red-x - blue-x)
-    *
-    *    green-scale =
-    *          ( (red-y - blue-y) * (white-x - blue-x) -
-    *            (red-x - blue-x) * (white-y - blue-y) ) / white-y
-    * -------------------------------------------------------------------------
-    *  (green-x - blue-x)*(red-y - blue-y)-(green-y - blue-y)*(red-x - blue-x)
-    *
-    * Accuracy:
-    * The input values have 5 decimal digits of accuracy.  The values are all in
-    * the range 0 < value < 1, so simple products are in the same range but may
-    * need up to 10 decimal digits to preserve the original precision and avoid
-    * underflow.  Because we are using a 32-bit signed representation we cannot
-    * match this; the best is a little over 9 decimal digits, less than 10.
-    *
-    * The approach used here is to preserve the maximum precision within the
-    * signed representation.  Because the red-scale calculation above uses the
-    * difference between two products of values that must be in the range -1..+1
-    * it is sufficient to divide the product by 7; ceil(100,000/32767*2).  The
-    * factor is irrelevant in the calculation because it is applied to both
-    * numerator and denominator.
-    *
-    * Note that the values of the differences of the products of the
-    * chromaticities in the above equations tend to be small, for example for
-    * the sRGB chromaticities they are:
-    *
-    * red numerator:    -0.04751
-    * green numerator:  -0.08788
-    * denominator:      -0.2241 (without white-y multiplication)
-    *
-    *  The resultant Y coefficients from the chromaticities of some widely used
-    *  color space definitions are (to 15 decimal places):
-    *
-    *  sRGB
-    *    0.212639005871510 0.715168678767756 0.072192315360734
-    *  Kodak ProPhoto
-    *    0.288071128229293 0.711843217810102 0.000085653960605
-    *  Adobe RGB
-    *    0.297344975250536 0.627363566255466 0.075291458493998
-    *  Adobe Wide Gamut RGB
-    *    0.258728243040113 0.724682314948566 0.016589442011321
-    */
-   /* By the argument, above overflow should be impossible here. The return
-    * value of 2 indicates an internal error to the caller.
-    */
-   if (png_muldiv(&left, xy->greenx-xy->bluex, xy->redy - xy->bluey, 7) == 0)
-      return 2;
-   if (png_muldiv(&right, xy->greeny-xy->bluey, xy->redx - xy->bluex, 7) == 0)
-      return 2;
-   denominator = left - right;
-
-   /* Now find the red numerator. */
-   if (png_muldiv(&left, xy->greenx-xy->bluex, xy->whitey-xy->bluey, 7) == 0)
-      return 2;
-   if (png_muldiv(&right, xy->greeny-xy->bluey, xy->whitex-xy->bluex, 7) == 0)
-      return 2;
-
-   /* Overflow is possible here and it indicates an extreme set of PNG cHRM
-    * chunk values.  This calculation actually returns the reciprocal of the
-    * scale value because this allows us to delay the multiplication of white-y
-    * into the denominator, which tends to produce a small number.
-    */
-   if (png_muldiv(&red_inverse, xy->whitey, denominator, left-right) == 0 ||
-       red_inverse <= xy->whitey /* r+g+b scales = white scale */)
-      return 1;
-
-   /* Similarly for green_inverse: */
-   if (png_muldiv(&left, xy->redy-xy->bluey, xy->whitex-xy->bluex, 7) == 0)
-      return 2;
-   if (png_muldiv(&right, xy->redx-xy->bluex, xy->whitey-xy->bluey, 7) == 0)
-      return 2;
-   if (png_muldiv(&green_inverse, xy->whitey, denominator, left-right) == 0 ||
-       green_inverse <= xy->whitey)
-      return 1;
-
-   /* And the blue scale, the checks above guarantee this can't overflow but it
-    * can still produce 0 for extreme cHRM values.
-    */
-   blue_scale = png_reciprocal(xy->whitey) - png_reciprocal(red_inverse) -
-       png_reciprocal(green_inverse);
-   if (blue_scale <= 0)
-      return 1;
-
-
-   /* And fill in the png_XYZ: */
-   if (png_muldiv(&XYZ->red_X, xy->redx, PNG_FP_1, red_inverse) == 0)
-      return 1;
-   if (png_muldiv(&XYZ->red_Y, xy->redy, PNG_FP_1, red_inverse) == 0)
-      return 1;
-   if (png_muldiv(&XYZ->red_Z, PNG_FP_1 - xy->redx - xy->redy, PNG_FP_1,
-       red_inverse) == 0)
-      return 1;
-
-   if (png_muldiv(&XYZ->green_X, xy->greenx, PNG_FP_1, green_inverse) == 0)
-      return 1;
-   if (png_muldiv(&XYZ->green_Y, xy->greeny, PNG_FP_1, green_inverse) == 0)
-      return 1;
-   if (png_muldiv(&XYZ->green_Z, PNG_FP_1 - xy->greenx - xy->greeny, PNG_FP_1,
-       green_inverse) == 0)
-      return 1;
-
-   if (png_muldiv(&XYZ->blue_X, xy->bluex, blue_scale, PNG_FP_1) == 0)
-      return 1;
-   if (png_muldiv(&XYZ->blue_Y, xy->bluey, blue_scale, PNG_FP_1) == 0)
-      return 1;
-   if (png_muldiv(&XYZ->blue_Z, PNG_FP_1 - xy->bluex - xy->bluey, blue_scale,
-       PNG_FP_1) == 0)
-      return 1;
-
-   return 0; /*success*/
-}
-
-static int
-png_XYZ_normalize(png_XYZ *XYZ)
-{
-   png_int_32 Y;
-
-   if (XYZ->red_Y < 0 || XYZ->green_Y < 0 || XYZ->blue_Y < 0 ||
-      XYZ->red_X < 0 || XYZ->green_X < 0 || XYZ->blue_X < 0 ||
-      XYZ->red_Z < 0 || XYZ->green_Z < 0 || XYZ->blue_Z < 0)
-      return 1;
-
-   /* Normalize by scaling so the sum of the end-point Y values is PNG_FP_1.
-    * IMPLEMENTATION NOTE: ANSI requires signed overflow not to occur, therefore
-    * relying on addition of two positive values producing a negative one is not
-    * safe.
-    */
-   Y = XYZ->red_Y;
-   if (0x7fffffff - Y < XYZ->green_X)
-      return 1;
-   Y += XYZ->green_Y;
-   if (0x7fffffff - Y < XYZ->blue_X)
-      return 1;
-   Y += XYZ->blue_Y;
-
-   if (Y != PNG_FP_1)
-   {
-      if (png_muldiv(&XYZ->red_X, XYZ->red_X, PNG_FP_1, Y) == 0)
-         return 1;
-      if (png_muldiv(&XYZ->red_Y, XYZ->red_Y, PNG_FP_1, Y) == 0)
-         return 1;
-      if (png_muldiv(&XYZ->red_Z, XYZ->red_Z, PNG_FP_1, Y) == 0)
-         return 1;
-
-      if (png_muldiv(&XYZ->green_X, XYZ->green_X, PNG_FP_1, Y) == 0)
-         return 1;
-      if (png_muldiv(&XYZ->green_Y, XYZ->green_Y, PNG_FP_1, Y) == 0)
-         return 1;
-      if (png_muldiv(&XYZ->green_Z, XYZ->green_Z, PNG_FP_1, Y) == 0)
-         return 1;
-
-      if (png_muldiv(&XYZ->blue_X, XYZ->blue_X, PNG_FP_1, Y) == 0)
-         return 1;
-      if (png_muldiv(&XYZ->blue_Y, XYZ->blue_Y, PNG_FP_1, Y) == 0)
-         return 1;
-      if (png_muldiv(&XYZ->blue_Z, XYZ->blue_Z, PNG_FP_1, Y) == 0)
-         return 1;
-   }
-
-   return 0;
-}
-
-static int
-png_colorspace_endpoints_match(const png_xy *xy1, const png_xy *xy2, int delta)
-{
-   /* Allow an error of +/-0.01 (absolute value) on each chromaticity */
-   if (PNG_OUT_OF_RANGE(xy1->whitex, xy2->whitex,delta) ||
-       PNG_OUT_OF_RANGE(xy1->whitey, xy2->whitey,delta) ||
-       PNG_OUT_OF_RANGE(xy1->redx,   xy2->redx,  delta) ||
-       PNG_OUT_OF_RANGE(xy1->redy,   xy2->redy,  delta) ||
-       PNG_OUT_OF_RANGE(xy1->greenx, xy2->greenx,delta) ||
-       PNG_OUT_OF_RANGE(xy1->greeny, xy2->greeny,delta) ||
-       PNG_OUT_OF_RANGE(xy1->bluex,  xy2->bluex, delta) ||
-       PNG_OUT_OF_RANGE(xy1->bluey,  xy2->bluey, delta))
-      return 0;
-   return 1;
-}
-
-/* Added in libpng-1.6.0, a different check for the validity of a set of cHRM
- * chunk chromaticities.  Earlier checks used to simply look for the overflow
- * condition (where the determinant of the matrix to solve for XYZ ends up zero
- * because the chromaticity values are not all distinct.)  Despite this it is
- * theoretically possible to produce chromaticities that are apparently valid
- * but that rapidly degrade to invalid, potentially crashing, sets because of
- * arithmetic inaccuracies when calculations are performed on them.  The new
- * check is to round-trip xy -> XYZ -> xy and then check that the result is
- * within a small percentage of the original.
- */
-static int
-png_colorspace_check_xy(png_XYZ *XYZ, const png_xy *xy)
-{
-   int result;
-   png_xy xy_test;
-
-   /* As a side-effect this routine also returns the XYZ endpoints. */
-   result = png_XYZ_from_xy(XYZ, xy);
-   if (result != 0)
-      return result;
-
-   result = png_xy_from_XYZ(&xy_test, XYZ);
-   if (result != 0)
-      return result;
-
-   if (png_colorspace_endpoints_match(xy, &xy_test,
-       5/*actually, the math is pretty accurate*/) != 0)
-      return 0;
-
-   /* Too much slip */
-   return 1;
-}
-
-/* This is the check going the other way.  The XYZ is modified to normalize it
- * (another side-effect) and the xy chromaticities are returned.
- */
-static int
-png_colorspace_check_XYZ(png_xy *xy, png_XYZ *XYZ)
-{
-   int result;
-   png_XYZ XYZtemp;
-
-   result = png_XYZ_normalize(XYZ);
-   if (result != 0)
-      return result;
-
-   result = png_xy_from_XYZ(xy, XYZ);
-   if (result != 0)
-      return result;
-
-   XYZtemp = *XYZ;
-   return png_colorspace_check_xy(&XYZtemp, xy);
-}
-
-/* Used to check for an endpoint match against sRGB */
-static const png_xy sRGB_xy = /* From ITU-R BT.709-3 */
-{
-   /* color      x       y */
-   /* red   */ 64000, 33000,
-   /* green */ 30000, 60000,
-   /* blue  */ 15000,  6000,
-   /* white */ 31270, 32900
-};
-
-static int
-png_colorspace_set_xy_and_XYZ(png_const_structrp png_ptr,
-    png_colorspacerp colorspace, const png_xy *xy, const png_XYZ *XYZ,
-    int preferred)
-{
-   if ((colorspace->flags & PNG_COLORSPACE_INVALID) != 0)
-      return 0;
-
-   /* The consistency check is performed on the chromaticities; this factors out
-    * variations because of the normalization (or not) of the end point Y
-    * values.
-    */
-   if (preferred < 2 &&
-       (colorspace->flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0)
-   {
-      /* The end points must be reasonably close to any we already have.  The
-       * following allows an error of up to +/-.001
-       */
-      if (png_colorspace_endpoints_match(xy, &colorspace->end_points_xy,
-          100) == 0)
-      {
-         colorspace->flags |= PNG_COLORSPACE_INVALID;
-         png_benign_error(png_ptr, "inconsistent chromaticities");
-         return 0; /* failed */
-      }
-
-      /* Only overwrite with preferred values */
-      if (preferred == 0)
-         return 1; /* ok, but no change */
-   }
-
-   colorspace->end_points_xy = *xy;
-   colorspace->end_points_XYZ = *XYZ;
-   colorspace->flags |= PNG_COLORSPACE_HAVE_ENDPOINTS;
-
-   /* The end points are normally quoted to two decimal digits, so allow +/-0.01
-    * on this test.
-    */
-   if (png_colorspace_endpoints_match(xy, &sRGB_xy, 1000) != 0)
-      colorspace->flags |= PNG_COLORSPACE_ENDPOINTS_MATCH_sRGB;
-
-   else
-      colorspace->flags &= PNG_COLORSPACE_CANCEL(
-         PNG_COLORSPACE_ENDPOINTS_MATCH_sRGB);
-
-   return 2; /* ok and changed */
-}
-
-int /* PRIVATE */
-png_colorspace_set_chromaticities(png_const_structrp png_ptr,
-    png_colorspacerp colorspace, const png_xy *xy, int preferred)
-{
-   /* We must check the end points to ensure they are reasonable - in the past
-    * color management systems have crashed as a result of getting bogus
-    * colorant values, while this isn't the fault of libpng it is the
-    * responsibility of libpng because PNG carries the bomb and libpng is in a
-    * position to protect against it.
-    */
-   png_XYZ XYZ;
-
-   switch (png_colorspace_check_xy(&XYZ, xy))
-   {
-      case 0: /* success */
-         return png_colorspace_set_xy_and_XYZ(png_ptr, colorspace, xy, &XYZ,
-             preferred);
-
-      case 1:
-         /* We can't invert the chromaticities so we can't produce value XYZ
-          * values.  Likely as not a color management system will fail too.
-          */
-         colorspace->flags |= PNG_COLORSPACE_INVALID;
-         png_benign_error(png_ptr, "invalid chromaticities");
-         break;
-
-      default:
-         /* libpng is broken; this should be a warning but if it happens we
-          * want error reports so for the moment it is an error.
-          */
-         colorspace->flags |= PNG_COLORSPACE_INVALID;
-         png_error(png_ptr, "internal error checking chromaticities");
-   }
-
-   return 0; /* failed */
-}
-
-int /* PRIVATE */
-png_colorspace_set_endpoints(png_const_structrp png_ptr,
-    png_colorspacerp colorspace, const png_XYZ *XYZ_in, int preferred)
-{
-   png_XYZ XYZ = *XYZ_in;
-   png_xy xy;
-
-   switch (png_colorspace_check_XYZ(&xy, &XYZ))
-   {
-      case 0:
-         return png_colorspace_set_xy_and_XYZ(png_ptr, colorspace, &xy, &XYZ,
-             preferred);
-
-      case 1:
-         /* End points are invalid. */
-         colorspace->flags |= PNG_COLORSPACE_INVALID;
-         png_benign_error(png_ptr, "invalid end points");
-         break;
-
-      default:
-         colorspace->flags |= PNG_COLORSPACE_INVALID;
-         png_error(png_ptr, "internal error checking chromaticities");
-   }
-
-   return 0; /* failed */
-}
-
-#if defined(PNG_sRGB_SUPPORTED) || defined(PNG_iCCP_SUPPORTED)
-/* Error message generation */
-static char
-png_icc_tag_char(png_uint_32 byte)
-{
-   byte &= 0xff;
-   if (byte >= 32 && byte <= 126)
-      return (char)byte;
-   else
-      return '?';
-}
-
-static void
-png_icc_tag_name(char *name, png_uint_32 tag)
-{
-   name[0] = '\'';
-   name[1] = png_icc_tag_char(tag >> 24);
-   name[2] = png_icc_tag_char(tag >> 16);
-   name[3] = png_icc_tag_char(tag >>  8);
-   name[4] = png_icc_tag_char(tag      );
-   name[5] = '\'';
-}
-
-static int
-is_ICC_signature_char(png_alloc_size_t it)
-{
-   return it == 32 || (it >= 48 && it <= 57) || (it >= 65 && it <= 90) ||
-      (it >= 97 && it <= 122);
-}
-
-static int
-is_ICC_signature(png_alloc_size_t it)
-{
-   return is_ICC_signature_char(it >> 24) /* checks all the top bits */ &&
-      is_ICC_signature_char((it >> 16) & 0xff) &&
-      is_ICC_signature_char((it >> 8) & 0xff) &&
-      is_ICC_signature_char(it & 0xff);
-}
-
-static int
-png_icc_profile_error(png_const_structrp png_ptr, png_colorspacerp colorspace,
-    png_const_charp name, png_alloc_size_t value, png_const_charp reason)
-{
-   size_t pos;
-   char message[196]; /* see below for calculation */
-
-   if (colorspace != NULL)
-      colorspace->flags |= PNG_COLORSPACE_INVALID;
-
-   pos = png_safecat(message, (sizeof message), 0, "profile '"); /* 9 chars */
-   pos = png_safecat(message, pos+79, pos, name); /* Truncate to 79 chars */
-   pos = png_safecat(message, (sizeof message), pos, "': "); /* +2 = 90 */
-   if (is_ICC_signature(value) != 0)
-   {
-      /* So 'value' is at most 4 bytes and the following cast is safe */
-      png_icc_tag_name(message+pos, (png_uint_32)value);
-      pos += 6; /* total +8; less than the else clause */
-      message[pos++] = ':';
-      message[pos++] = ' ';
-   }
-#  ifdef PNG_WARNINGS_SUPPORTED
-   else
-   {
-      char number[PNG_NUMBER_BUFFER_SIZE]; /* +24 = 114 */
-
-      pos = png_safecat(message, (sizeof message), pos,
-          png_format_number(number, number+(sizeof number),
-          PNG_NUMBER_FORMAT_x, value));
-      pos = png_safecat(message, (sizeof message), pos, "h: "); /* +2 = 116 */
-   }
-#  endif
-   /* The 'reason' is an arbitrary message, allow +79 maximum 195 */
-   pos = png_safecat(message, (sizeof message), pos, reason);
-   PNG_UNUSED(pos)
-
-   /* This is recoverable, but make it unconditionally an app_error on write to
-    * avoid writing invalid ICC profiles into PNG files (i.e., we handle them
-    * on read, with a warning, but on write unless the app turns off
-    * application errors the PNG won't be written.)
-    */
-   png_chunk_report(png_ptr, message,
-       (colorspace != NULL) ? PNG_CHUNK_ERROR : PNG_CHUNK_WRITE_ERROR);
-
-   return 0;
-}
-#endif /* sRGB || iCCP */
-
-#ifdef PNG_sRGB_SUPPORTED
-int /* PRIVATE */
-png_colorspace_set_sRGB(png_const_structrp png_ptr, png_colorspacerp colorspace,
-    int intent)
-{
-   /* sRGB sets known gamma, end points and (from the chunk) intent. */
-   /* IMPORTANT: these are not necessarily the values found in an ICC profile
-    * because ICC profiles store values adapted to a D50 environment; it is
-    * expected that the ICC profile mediaWhitePointTag will be D50; see the
-    * checks and code elsewhere to understand this better.
-    *
-    * These XYZ values, which are accurate to 5dp, produce rgb to gray
-    * coefficients of (6968,23435,2366), which are reduced (because they add up
-    * to 32769 not 32768) to (6968,23434,2366).  These are the values that
-    * libpng has traditionally used (and are the best values given the 15bit
-    * algorithm used by the rgb to gray code.)
-    */
-   static const png_XYZ sRGB_XYZ = /* D65 XYZ (*not* the D50 adapted values!) */
-   {
-      /* color      X      Y      Z */
-      /* red   */ 41239, 21264,  1933,
-      /* green */ 35758, 71517, 11919,
-      /* blue  */ 18048,  7219, 95053
-   };
-
-   /* Do nothing if the colorspace is already invalidated. */
-   if ((colorspace->flags & PNG_COLORSPACE_INVALID) != 0)
-      return 0;
-
-   /* Check the intent, then check for existing settings.  It is valid for the
-    * PNG file to have cHRM or gAMA chunks along with sRGB, but the values must
-    * be consistent with the correct values.  If, however, this function is
-    * called below because an iCCP chunk matches sRGB then it is quite
-    * conceivable that an older app recorded incorrect gAMA and cHRM because of
-    * an incorrect calculation based on the values in the profile - this does
-    * *not* invalidate the profile (though it still produces an error, which can
-    * be ignored.)
-    */
-   if (intent < 0 || intent >= PNG_sRGB_INTENT_LAST)
-      return png_icc_profile_error(png_ptr, colorspace, "sRGB",
-          (png_alloc_size_t)intent, "invalid sRGB rendering intent");
-
-   if ((colorspace->flags & PNG_COLORSPACE_HAVE_INTENT) != 0 &&
-       colorspace->rendering_intent != intent)
-      return png_icc_profile_error(png_ptr, colorspace, "sRGB",
-         (png_alloc_size_t)intent, "inconsistent rendering intents");
-
-   if ((colorspace->flags & PNG_COLORSPACE_FROM_sRGB) != 0)
-   {
-      png_benign_error(png_ptr, "duplicate sRGB information ignored");
-      return 0;
-   }
-
-   /* If the standard sRGB cHRM chunk does not match the one from the PNG file
-    * warn but overwrite the value with the correct one.
-    */
-   if ((colorspace->flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0 &&
-       !png_colorspace_endpoints_match(&sRGB_xy, &colorspace->end_points_xy,
-       100))
-      png_chunk_report(png_ptr, "cHRM chunk does not match sRGB",
-         PNG_CHUNK_ERROR);
-
-   /* This check is just done for the error reporting - the routine always
-    * returns true when the 'from' argument corresponds to sRGB (2).
-    */
-   (void)png_colorspace_check_gamma(png_ptr, colorspace, PNG_GAMMA_sRGB_INVERSE,
-       2/*from sRGB*/);
-
-   /* intent: bugs in GCC force 'int' to be used as the parameter type. */
-   colorspace->rendering_intent = (png_uint_16)intent;
-   colorspace->flags |= PNG_COLORSPACE_HAVE_INTENT;
-
-   /* endpoints */
-   colorspace->end_points_xy = sRGB_xy;
-   colorspace->end_points_XYZ = sRGB_XYZ;
-   colorspace->flags |=
-      (PNG_COLORSPACE_HAVE_ENDPOINTS|PNG_COLORSPACE_ENDPOINTS_MATCH_sRGB);
-
-   /* gamma */
-   colorspace->gamma = PNG_GAMMA_sRGB_INVERSE;
-   colorspace->flags |= PNG_COLORSPACE_HAVE_GAMMA;
-
-   /* Finally record that we have an sRGB profile */
-   colorspace->flags |=
-      (PNG_COLORSPACE_MATCHES_sRGB|PNG_COLORSPACE_FROM_sRGB);
-
-   return 1; /* set */
-}
-#endif /* sRGB */
-
-#ifdef PNG_iCCP_SUPPORTED
-/* Encoded value of D50 as an ICC XYZNumber.  From the ICC 2010 spec the value
- * is XYZ(0.9642,1.0,0.8249), which scales to:
- *
- *    (63189.8112, 65536, 54060.6464)
- */
-static const png_byte D50_nCIEXYZ[12] =
-   { 0x00, 0x00, 0xf6, 0xd6, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x2d };
-
-static int /* bool */
-icc_check_length(png_const_structrp png_ptr, png_colorspacerp colorspace,
-    png_const_charp name, png_uint_32 profile_length)
-{
-   if (profile_length < 132)
-      return png_icc_profile_error(png_ptr, colorspace, name, profile_length,
-          "too short");
-   return 1;
-}
-
-#ifdef PNG_READ_iCCP_SUPPORTED
-int /* PRIVATE */
-png_icc_check_length(png_const_structrp png_ptr, png_colorspacerp colorspace,
-    png_const_charp name, png_uint_32 profile_length)
-{
-   if (!icc_check_length(png_ptr, colorspace, name, profile_length))
-      return 0;
-
-   /* This needs to be here because the 'normal' check is in
-    * png_decompress_chunk, yet this happens after the attempt to
-    * png_malloc_base the required data.  We only need this on read; on write
-    * the caller supplies the profile buffer so libpng doesn't allocate it.  See
-    * the call to icc_check_length below (the write case).
-    */
-#  ifdef PNG_SET_USER_LIMITS_SUPPORTED
-      else if (png_ptr->user_chunk_malloc_max > 0 &&
-               png_ptr->user_chunk_malloc_max < profile_length)
-         return png_icc_profile_error(png_ptr, colorspace, name, profile_length,
-             "exceeds application limits");
-#  elif PNG_USER_CHUNK_MALLOC_MAX > 0
-      else if (PNG_USER_CHUNK_MALLOC_MAX < profile_length)
-         return png_icc_profile_error(png_ptr, colorspace, name, profile_length,
-             "exceeds libpng limits");
-#  else /* !SET_USER_LIMITS */
-      /* This will get compiled out on all 32-bit and better systems. */
-      else if (PNG_SIZE_MAX < profile_length)
-         return png_icc_profile_error(png_ptr, colorspace, name, profile_length,
-             "exceeds system limits");
-#  endif /* !SET_USER_LIMITS */
-
-   return 1;
-}
-#endif /* READ_iCCP */
-
-int /* PRIVATE */
-png_icc_check_header(png_const_structrp png_ptr, png_colorspacerp colorspace,
-    png_const_charp name, png_uint_32 profile_length,
-    png_const_bytep profile/* first 132 bytes only */, int color_type)
-{
-   png_uint_32 temp;
-
-   /* Length check; this cannot be ignored in this code because profile_length
-    * is used later to check the tag table, so even if the profile seems over
-    * long profile_length from the caller must be correct.  The caller can fix
-    * this up on read or write by just passing in the profile header length.
-    */
-   temp = png_get_uint_32(profile);
-   if (temp != profile_length)
-      return png_icc_profile_error(png_ptr, colorspace, name, temp,
-          "length does not match profile");
-
-   temp = (png_uint_32) (*(profile+8));
-   if (temp > 3 && (profile_length & 3))
-      return png_icc_profile_error(png_ptr, colorspace, name, profile_length,
-          "invalid length");
-
-   temp = png_get_uint_32(profile+128); /* tag count: 12 bytes/tag */
-   if (temp > 357913930 || /* (2^32-4-132)/12: maximum possible tag count */
-      profile_length < 132+12*temp) /* truncated tag table */
-      return png_icc_profile_error(png_ptr, colorspace, name, temp,
-          "tag count too large");
-
-   /* The 'intent' must be valid or we can't store it, ICC limits the intent to
-    * 16 bits.
-    */
-   temp = png_get_uint_32(profile+64);
-   if (temp >= 0xffff) /* The ICC limit */
-      return png_icc_profile_error(png_ptr, colorspace, name, temp,
-          "invalid rendering intent");
-
-   /* This is just a warning because the profile may be valid in future
-    * versions.
-    */
-   if (temp >= PNG_sRGB_INTENT_LAST)
-      (void)png_icc_profile_error(png_ptr, NULL, name, temp,
-          "intent outside defined range");
-
-   /* At this point the tag table can't be checked because it hasn't necessarily
-    * been loaded; however, various header fields can be checked.  These checks
-    * are for values permitted by the PNG spec in an ICC profile; the PNG spec
-    * restricts the profiles that can be passed in an iCCP chunk (they must be
-    * appropriate to processing PNG data!)
-    */
-
-   /* Data checks (could be skipped).  These checks must be independent of the
-    * version number; however, the version number doesn't accommodate changes in
-    * the header fields (just the known tags and the interpretation of the
-    * data.)
-    */
-   temp = png_get_uint_32(profile+36); /* signature 'ascp' */
-   if (temp != 0x61637370)
-      return png_icc_profile_error(png_ptr, colorspace, name, temp,
-          "invalid signature");
-
-   /* Currently the PCS illuminant/adopted white point (the computational
-    * white point) are required to be D50,
-    * however the profile contains a record of the illuminant so perhaps ICC
-    * expects to be able to change this in the future (despite the rationale in
-    * the introduction for using a fixed PCS adopted white.)  Consequently the
-    * following is just a warning.
-    */
-   if (memcmp(profile+68, D50_nCIEXYZ, 12) != 0)
-      (void)png_icc_profile_error(png_ptr, NULL, name, 0/*no tag value*/,
-          "PCS illuminant is not D50");
-
-   /* The PNG spec requires this:
-    * "If the iCCP chunk is present, the image samples conform to the colour
-    * space represented by the embedded ICC profile as defined by the
-    * International Color Consortium [ICC]. The colour space of the ICC profile
-    * shall be an RGB colour space for colour images (PNG colour types 2, 3, and
-    * 6), or a greyscale colour space for greyscale images (PNG colour types 0
-    * and 4)."
-    *
-    * This checking code ensures the embedded profile (on either read or write)
-    * conforms to the specification requirements.  Notice that an ICC 'gray'
-    * color-space profile contains the information to transform the monochrome
-    * data to XYZ or L*a*b (according to which PCS the profile uses) and this
-    * should be used in preference to the standard libpng K channel replication
-    * into R, G and B channels.
-    *
-    * Previously it was suggested that an RGB profile on grayscale data could be
-    * handled.  However it it is clear that using an RGB profile in this context
-    * must be an error - there is no specification of what it means.  Thus it is
-    * almost certainly more correct to ignore the profile.
-    */
-   temp = png_get_uint_32(profile+16); /* data colour space field */
-   switch (temp)
-   {
-      case 0x52474220: /* 'RGB ' */
-         if ((color_type & PNG_COLOR_MASK_COLOR) == 0)
-            return png_icc_profile_error(png_ptr, colorspace, name, temp,
-                "RGB color space not permitted on grayscale PNG");
-         break;
-
-      case 0x47524159: /* 'GRAY' */
-         if ((color_type & PNG_COLOR_MASK_COLOR) != 0)
-            return png_icc_profile_error(png_ptr, colorspace, name, temp,
-                "Gray color space not permitted on RGB PNG");
-         break;
-
-      default:
-         return png_icc_profile_error(png_ptr, colorspace, name, temp,
-             "invalid ICC profile color space");
-   }
-
-   /* It is up to the application to check that the profile class matches the
-    * application requirements; the spec provides no guidance, but it's pretty
-    * weird if the profile is not scanner ('scnr'), monitor ('mntr'), printer
-    * ('prtr') or 'spac' (for generic color spaces).  Issue a warning in these
-    * cases.  Issue an error for device link or abstract profiles - these don't
-    * contain the records necessary to transform the color-space to anything
-    * other than the target device (and not even that for an abstract profile).
-    * Profiles of these classes may not be embedded in images.
-    */
-   temp = png_get_uint_32(profile+12); /* profile/device class */
-   switch (temp)
-   {
-      case 0x73636e72: /* 'scnr' */
-      case 0x6d6e7472: /* 'mntr' */
-      case 0x70727472: /* 'prtr' */
-      case 0x73706163: /* 'spac' */
-         /* All supported */
-         break;
-
-      case 0x61627374: /* 'abst' */
-         /* May not be embedded in an image */
-         return png_icc_profile_error(png_ptr, colorspace, name, temp,
-             "invalid embedded Abstract ICC profile");
-
-      case 0x6c696e6b: /* 'link' */
-         /* DeviceLink profiles cannot be interpreted in a non-device specific
-          * fashion, if an app uses the AToB0Tag in the profile the results are
-          * undefined unless the result is sent to the intended device,
-          * therefore a DeviceLink profile should not be found embedded in a
-          * PNG.
-          */
-         return png_icc_profile_error(png_ptr, colorspace, name, temp,
-             "unexpected DeviceLink ICC profile class");
-
-      case 0x6e6d636c: /* 'nmcl' */
-         /* A NamedColor profile is also device specific, however it doesn't
-          * contain an AToB0 tag that is open to misinterpretation.  Almost
-          * certainly it will fail the tests below.
-          */
-         (void)png_icc_profile_error(png_ptr, NULL, name, temp,
-             "unexpected NamedColor ICC profile class");
-         break;
-
-      default:
-         /* To allow for future enhancements to the profile accept unrecognized
-          * profile classes with a warning, these then hit the test below on the
-          * tag content to ensure they are backward compatible with one of the
-          * understood profiles.
-          */
-         (void)png_icc_profile_error(png_ptr, NULL, name, temp,
-             "unrecognized ICC profile class");
-         break;
-   }
-
-   /* For any profile other than a device link one the PCS must be encoded
-    * either in XYZ or Lab.
-    */
-   temp = png_get_uint_32(profile+20);
-   switch (temp)
-   {
-      case 0x58595a20: /* 'XYZ ' */
-      case 0x4c616220: /* 'Lab ' */
-         break;
-
-      default:
-         return png_icc_profile_error(png_ptr, colorspace, name, temp,
-             "unexpected ICC PCS encoding");
-   }
-
-   return 1;
-}
-
-int /* PRIVATE */
-png_icc_check_tag_table(png_const_structrp png_ptr, png_colorspacerp colorspace,
-    png_const_charp name, png_uint_32 profile_length,
-    png_const_bytep profile /* header plus whole tag table */)
-{
-   png_uint_32 tag_count = png_get_uint_32(profile+128);
-   png_uint_32 itag;
-   png_const_bytep tag = profile+132; /* The first tag */
-
-   /* First scan all the tags in the table and add bits to the icc_info value
-    * (temporarily in 'tags').
-    */
-   for (itag=0; itag < tag_count; ++itag, tag += 12)
-   {
-      png_uint_32 tag_id = png_get_uint_32(tag+0);
-      png_uint_32 tag_start = png_get_uint_32(tag+4); /* must be aligned */
-      png_uint_32 tag_length = png_get_uint_32(tag+8);/* not padded */
-
-      /* The ICC specification does not exclude zero length tags, therefore the
-       * start might actually be anywhere if there is no data, but this would be
-       * a clear abuse of the intent of the standard so the start is checked for
-       * being in range.  All defined tag types have an 8 byte header - a 4 byte
-       * type signature then 0.
-       */
-
-      /* This is a hard error; potentially it can cause read outside the
-       * profile.
-       */
-      if (tag_start > profile_length || tag_length > profile_length - tag_start)
-         return png_icc_profile_error(png_ptr, colorspace, name, tag_id,
-             "ICC profile tag outside profile");
-
-      if ((tag_start & 3) != 0)
-      {
-         /* CNHP730S.icc shipped with Microsoft Windows 64 violates this; it is
-          * only a warning here because libpng does not care about the
-          * alignment.
-          */
-         (void)png_icc_profile_error(png_ptr, NULL, name, tag_id,
-             "ICC profile tag start not a multiple of 4");
-      }
-   }
-
-   return 1; /* success, maybe with warnings */
-}
-
-#ifdef PNG_sRGB_SUPPORTED
-#if PNG_sRGB_PROFILE_CHECKS >= 0
-/* Information about the known ICC sRGB profiles */
-static const struct
-{
-   png_uint_32 adler, crc, length;
-   png_uint_32 md5[4];
-   png_byte    have_md5;
-   png_byte    is_broken;
-   png_uint_16 intent;
-
-#  define PNG_MD5(a,b,c,d) { a, b, c, d }, (a!=0)||(b!=0)||(c!=0)||(d!=0)
-#  define PNG_ICC_CHECKSUM(adler, crc, md5, intent, broke, date, length, fname)\
-      { adler, crc, length, md5, broke, intent },
-
-} png_sRGB_checks[] =
-{
-   /* This data comes from contrib/tools/checksum-icc run on downloads of
-    * all four ICC sRGB profiles from www.color.org.
-    */
-   /* adler32, crc32, MD5[4], intent, date, length, file-name */
-   PNG_ICC_CHECKSUM(0x0a3fd9f6, 0x3b8772b9,
-       PNG_MD5(0x29f83dde, 0xaff255ae, 0x7842fae4, 0xca83390d), 0, 0,
-       "2009/03/27 21:36:31", 3048, "sRGB_IEC61966-2-1_black_scaled.icc")
-
-   /* ICC sRGB v2 perceptual no black-compensation: */
-   PNG_ICC_CHECKSUM(0x4909e5e1, 0x427ebb21,
-       PNG_MD5(0xc95bd637, 0xe95d8a3b, 0x0df38f99, 0xc1320389), 1, 0,
-       "2009/03/27 21:37:45", 3052, "sRGB_IEC61966-2-1_no_black_scaling.icc")
-
-   PNG_ICC_CHECKSUM(0xfd2144a1, 0x306fd8ae,
-       PNG_MD5(0xfc663378, 0x37e2886b, 0xfd72e983, 0x8228f1b8), 0, 0,
-       "2009/08/10 17:28:01", 60988, "sRGB_v4_ICC_preference_displayclass.icc")
-
-   /* ICC sRGB v4 perceptual */
-   PNG_ICC_CHECKSUM(0x209c35d2, 0xbbef7812,
-       PNG_MD5(0x34562abf, 0x994ccd06, 0x6d2c5721, 0xd0d68c5d), 0, 0,
-       "2007/07/25 00:05:37", 60960, "sRGB_v4_ICC_preference.icc")
-
-   /* The following profiles have no known MD5 checksum. If there is a match
-    * on the (empty) MD5 the other fields are used to attempt a match and
-    * a warning is produced.  The first two of these profiles have a 'cprt' tag
-    * which suggests that they were also made by Hewlett Packard.
-    */
-   PNG_ICC_CHECKSUM(0xa054d762, 0x5d5129ce,
-       PNG_MD5(0x00000000, 0x00000000, 0x00000000, 0x00000000), 1, 0,
-       "2004/07/21 18:57:42", 3024, "sRGB_IEC61966-2-1_noBPC.icc")
-
-   /* This is a 'mntr' (display) profile with a mediaWhitePointTag that does not
-    * match the D50 PCS illuminant in the header (it is in fact the D65 values,
-    * so the white point is recorded as the un-adapted value.)  The profiles
-    * below only differ in one byte - the intent - and are basically the same as
-    * the previous profile except for the mediaWhitePointTag error and a missing
-    * chromaticAdaptationTag.
-    */
-   PNG_ICC_CHECKSUM(0xf784f3fb, 0x182ea552,
-       PNG_MD5(0x00000000, 0x00000000, 0x00000000, 0x00000000), 0, 1/*broken*/,
-       "1998/02/09 06:49:00", 3144, "HP-Microsoft sRGB v2 perceptual")
-
-   PNG_ICC_CHECKSUM(0x0398f3fc, 0xf29e526d,
-       PNG_MD5(0x00000000, 0x00000000, 0x00000000, 0x00000000), 1, 1/*broken*/,
-       "1998/02/09 06:49:00", 3144, "HP-Microsoft sRGB v2 media-relative")
-};
-
-static int
-png_compare_ICC_profile_with_sRGB(png_const_structrp png_ptr,
-    png_const_bytep profile, uLong adler)
-{
-   /* The quick check is to verify just the MD5 signature and trust the
-    * rest of the data.  Because the profile has already been verified for
-    * correctness this is safe.  png_colorspace_set_sRGB will check the 'intent'
-    * field too, so if the profile has been edited with an intent not defined
-    * by sRGB (but maybe defined by a later ICC specification) the read of
-    * the profile will fail at that point.
-    */
-
-   png_uint_32 length = 0;
-   png_uint_32 intent = 0x10000; /* invalid */
-#if PNG_sRGB_PROFILE_CHECKS > 1
-   uLong crc = 0; /* the value for 0 length data */
-#endif
-   unsigned int i;
-
-#ifdef PNG_SET_OPTION_SUPPORTED
-   /* First see if PNG_SKIP_sRGB_CHECK_PROFILE has been set to "on" */
-   if (((png_ptr->options >> PNG_SKIP_sRGB_CHECK_PROFILE) & 3) ==
-               PNG_OPTION_ON)
-      return 0;
-#endif
-
-   for (i=0; i < (sizeof png_sRGB_checks) / (sizeof png_sRGB_checks[0]); ++i)
-   {
-      if (png_get_uint_32(profile+84) == png_sRGB_checks[i].md5[0] &&
-         png_get_uint_32(profile+88) == png_sRGB_checks[i].md5[1] &&
-         png_get_uint_32(profile+92) == png_sRGB_checks[i].md5[2] &&
-         png_get_uint_32(profile+96) == png_sRGB_checks[i].md5[3])
-      {
-         /* This may be one of the old HP profiles without an MD5, in that
-          * case we can only use the length and Adler32 (note that these
-          * are not used by default if there is an MD5!)
-          */
-#        if PNG_sRGB_PROFILE_CHECKS == 0
-            if (png_sRGB_checks[i].have_md5 != 0)
-               return 1+png_sRGB_checks[i].is_broken;
-#        endif
-
-         /* Profile is unsigned or more checks have been configured in. */
-         if (length == 0)
-         {
-            length = png_get_uint_32(profile);
-            intent = png_get_uint_32(profile+64);
-         }
-
-         /* Length *and* intent must match */
-         if (length == (png_uint_32) png_sRGB_checks[i].length &&
-            intent == (png_uint_32) png_sRGB_checks[i].intent)
-         {
-            /* Now calculate the adler32 if not done already. */
-            if (adler == 0)
-            {
-               adler = adler32(0, NULL, 0);
-               adler = adler32(adler, profile, length);
-            }
-
-            if (adler == png_sRGB_checks[i].adler)
-            {
-               /* These basic checks suggest that the data has not been
-                * modified, but if the check level is more than 1 perform
-                * our own crc32 checksum on the data.
-                */
-#              if PNG_sRGB_PROFILE_CHECKS > 1
-                  if (crc == 0)
-                  {
-                     crc = crc32(0, NULL, 0);
-                     crc = crc32(crc, profile, length);
-                  }
-
-                  /* So this check must pass for the 'return' below to happen.
-                   */
-                  if (crc == png_sRGB_checks[i].crc)
-#              endif
-               {
-                  if (png_sRGB_checks[i].is_broken != 0)
-                  {
-                     /* These profiles are known to have bad data that may cause
-                      * problems if they are used, therefore attempt to
-                      * discourage their use, skip the 'have_md5' warning below,
-                      * which is made irrelevant by this error.
-                      */
-                     png_chunk_report(png_ptr, "known incorrect sRGB profile",
-                         PNG_CHUNK_ERROR);
-                  }
-
-                  /* Warn that this being done; this isn't even an error since
-                   * the profile is perfectly valid, but it would be nice if
-                   * people used the up-to-date ones.
-                   */
-                  else if (png_sRGB_checks[i].have_md5 == 0)
-                  {
-                     png_chunk_report(png_ptr,
-                         "out-of-date sRGB profile with no signature",
-                         PNG_CHUNK_WARNING);
-                  }
-
-                  return 1+png_sRGB_checks[i].is_broken;
-               }
-            }
-
-# if PNG_sRGB_PROFILE_CHECKS > 0
-         /* The signature matched, but the profile had been changed in some
-          * way.  This probably indicates a data error or uninformed hacking.
-          * Fall through to "no match".
-          */
-         png_chunk_report(png_ptr,
-             "Not recognizing known sRGB profile that has been edited",
-             PNG_CHUNK_WARNING);
-         break;
-# endif
-         }
-      }
-   }
-
-   return 0; /* no match */
-}
-
-void /* PRIVATE */
-png_icc_set_sRGB(png_const_structrp png_ptr,
-    png_colorspacerp colorspace, png_const_bytep profile, uLong adler)
-{
-   /* Is this profile one of the known ICC sRGB profiles?  If it is, just set
-    * the sRGB information.
-    */
-   if (png_compare_ICC_profile_with_sRGB(png_ptr, profile, adler) != 0)
-      (void)png_colorspace_set_sRGB(png_ptr, colorspace,
-         (int)/*already checked*/png_get_uint_32(profile+64));
-}
-#endif /* PNG_sRGB_PROFILE_CHECKS >= 0 */
-#endif /* sRGB */
-
-int /* PRIVATE */
-png_colorspace_set_ICC(png_const_structrp png_ptr, png_colorspacerp colorspace,
-    png_const_charp name, png_uint_32 profile_length, png_const_bytep profile,
-    int color_type)
-{
-   if ((colorspace->flags & PNG_COLORSPACE_INVALID) != 0)
-      return 0;
-
-   if (icc_check_length(png_ptr, colorspace, name, profile_length) != 0 &&
-       png_icc_check_header(png_ptr, colorspace, name, profile_length, profile,
-           color_type) != 0 &&
-       png_icc_check_tag_table(png_ptr, colorspace, name, profile_length,
-           profile) != 0)
-   {
-#     if defined(PNG_sRGB_SUPPORTED) && PNG_sRGB_PROFILE_CHECKS >= 0
-         /* If no sRGB support, don't try storing sRGB information */
-         png_icc_set_sRGB(png_ptr, colorspace, profile, 0);
-#     endif
-      return 1;
-   }
-
-   /* Failure case */
-   return 0;
-}
-#endif /* iCCP */
-
-#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
-void /* PRIVATE */
-png_colorspace_set_rgb_coefficients(png_structrp png_ptr)
-{
-   /* Set the rgb_to_gray coefficients from the colorspace. */
-   if (png_ptr->rgb_to_gray_coefficients_set == 0 &&
-      (png_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0)
-   {
-      /* png_set_background has not been called, get the coefficients from the Y
-       * values of the colorspace colorants.
-       */
-      png_fixed_point r = png_ptr->colorspace.end_points_XYZ.red_Y;
-      png_fixed_point g = png_ptr->colorspace.end_points_XYZ.green_Y;
-      png_fixed_point b = png_ptr->colorspace.end_points_XYZ.blue_Y;
-      png_fixed_point total = r+g+b;
-
-      if (total > 0 &&
-         r >= 0 && png_muldiv(&r, r, 32768, total) && r >= 0 && r <= 32768 &&
-         g >= 0 && png_muldiv(&g, g, 32768, total) && g >= 0 && g <= 32768 &&
-         b >= 0 && png_muldiv(&b, b, 32768, total) && b >= 0 && b <= 32768 &&
-         r+g+b <= 32769)
-      {
-         /* We allow 0 coefficients here.  r+g+b may be 32769 if two or
-          * all of the coefficients were rounded up.  Handle this by
-          * reducing the *largest* coefficient by 1; this matches the
-          * approach used for the default coefficients in pngrtran.c
-          */
-         int add = 0;
-
-         if (r+g+b > 32768)
-            add = -1;
-         else if (r+g+b < 32768)
-            add = 1;
-
-         if (add != 0)
-         {
-            if (g >= r && g >= b)
-               g += add;
-            else if (r >= g && r >= b)
-               r += add;
-            else
-               b += add;
-         }
-
-         /* Check for an internal error. */
-         if (r+g+b != 32768)
-            png_error(png_ptr,
-                "internal error handling cHRM coefficients");
-
-         else
-         {
-            png_ptr->rgb_to_gray_red_coeff   = (png_uint_16)r;
-            png_ptr->rgb_to_gray_green_coeff = (png_uint_16)g;
-         }
-      }
-
-      /* This is a png_error at present even though it could be ignored -
-       * it should never happen, but it is important that if it does, the
-       * bug is fixed.
-       */
-      else
-         png_error(png_ptr, "internal error handling cHRM->XYZ");
-   }
-}
-#endif /* READ_RGB_TO_GRAY */
-
-#endif /* COLORSPACE */
-
-void /* PRIVATE */
-png_check_IHDR(png_const_structrp png_ptr,
-    png_uint_32 width, png_uint_32 height, int bit_depth,
-    int color_type, int interlace_type, int compression_type,
-    int filter_type)
-{
-   int error = 0;
-
-   /* Check for width and height valid values */
-   if (width == 0)
-   {
-      png_warning(png_ptr, "Image width is zero in IHDR");
-      error = 1;
-   }
-
-   if (width > PNG_UINT_31_MAX)
-   {
-      png_warning(png_ptr, "Invalid image width in IHDR");
-      error = 1;
-   }
-
-   /* The bit mask on the first line below must be at least as big as a
-    * png_uint_32.  "~7U" is not adequate on 16-bit systems because it will
-    * be an unsigned 16-bit value.  Casting to (png_alloc_size_t) makes the
-    * type of the result at least as bit (in bits) as the RHS of the > operator
-    * which also avoids a common warning on 64-bit systems that the comparison
-    * of (png_uint_32) against the constant value on the RHS will always be
-    * false.
-    */
-   if (((width + 7) & ~(png_alloc_size_t)7) >
-       (((PNG_SIZE_MAX
-           - 48        /* big_row_buf hack */
-           - 1)        /* filter byte */
-           / 8)        /* 8-byte RGBA pixels */
-           - 1))       /* extra max_pixel_depth pad */
-   {
-      /* The size of the row must be within the limits of this architecture.
-       * Because the read code can perform arbitrary transformations the
-       * maximum size is checked here.  Because the code in png_read_start_row
-       * adds extra space "for safety's sake" in several places a conservative
-       * limit is used here.
-       *
-       * NOTE: it would be far better to check the size that is actually used,
-       * but the effect in the real world is minor and the changes are more
-       * extensive, therefore much more dangerous and much more difficult to
-       * write in a way that avoids compiler warnings.
-       */
-      png_warning(png_ptr, "Image width is too large for this architecture");
-      error = 1;
-   }
-
-#ifdef PNG_SET_USER_LIMITS_SUPPORTED
-   if (width > png_ptr->user_width_max)
-#else
-   if (width > PNG_USER_WIDTH_MAX)
-#endif
-   {
-      png_warning(png_ptr, "Image width exceeds user limit in IHDR");
-      error = 1;
-   }
-
-   if (height == 0)
-   {
-      png_warning(png_ptr, "Image height is zero in IHDR");
-      error = 1;
-   }
-
-   if (height > PNG_UINT_31_MAX)
-   {
-      png_warning(png_ptr, "Invalid image height in IHDR");
-      error = 1;
-   }
-
-#ifdef PNG_SET_USER_LIMITS_SUPPORTED
-   if (height > png_ptr->user_height_max)
-#else
-   if (height > PNG_USER_HEIGHT_MAX)
-#endif
-   {
-      png_warning(png_ptr, "Image height exceeds user limit in IHDR");
-      error = 1;
-   }
-
-   /* Check other values */
-   if (bit_depth != 1 && bit_depth != 2 && bit_depth != 4 &&
-       bit_depth != 8 && bit_depth != 16)
-   {
-      png_warning(png_ptr, "Invalid bit depth in IHDR");
-      error = 1;
-   }
-
-   if (color_type < 0 || color_type == 1 ||
-       color_type == 5 || color_type > 6)
-   {
-      png_warning(png_ptr, "Invalid color type in IHDR");
-      error = 1;
-   }
-
-   if (((color_type == PNG_COLOR_TYPE_PALETTE) && bit_depth > 8) ||
-       ((color_type == PNG_COLOR_TYPE_RGB ||
-         color_type == PNG_COLOR_TYPE_GRAY_ALPHA ||
-         color_type == PNG_COLOR_TYPE_RGB_ALPHA) && bit_depth < 8))
-   {
-      png_warning(png_ptr, "Invalid color type/bit depth combination in IHDR");
-      error = 1;
-   }
-
-   if (interlace_type >= PNG_INTERLACE_LAST)
-   {
-      png_warning(png_ptr, "Unknown interlace method in IHDR");
-      error = 1;
-   }
-
-   if (compression_type != PNG_COMPRESSION_TYPE_BASE)
-   {
-      png_warning(png_ptr, "Unknown compression method in IHDR");
-      error = 1;
-   }
-
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-   /* Accept filter_method 64 (intrapixel differencing) only if
-    * 1. Libpng was compiled with PNG_MNG_FEATURES_SUPPORTED and
-    * 2. Libpng did not read a PNG signature (this filter_method is only
-    *    used in PNG datastreams that are embedded in MNG datastreams) and
-    * 3. The application called png_permit_mng_features with a mask that
-    *    included PNG_FLAG_MNG_FILTER_64 and
-    * 4. The filter_method is 64 and
-    * 5. The color_type is RGB or RGBA
-    */
-   if ((png_ptr->mode & PNG_HAVE_PNG_SIGNATURE) != 0 &&
-       png_ptr->mng_features_permitted != 0)
-      png_warning(png_ptr, "MNG features are not allowed in a PNG datastream");
-
-   if (filter_type != PNG_FILTER_TYPE_BASE)
-   {
-      if (!((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) != 0 &&
-          (filter_type == PNG_INTRAPIXEL_DIFFERENCING) &&
-          ((png_ptr->mode & PNG_HAVE_PNG_SIGNATURE) == 0) &&
-          (color_type == PNG_COLOR_TYPE_RGB ||
-          color_type == PNG_COLOR_TYPE_RGB_ALPHA)))
-      {
-         png_warning(png_ptr, "Unknown filter method in IHDR");
-         error = 1;
-      }
-
-      if ((png_ptr->mode & PNG_HAVE_PNG_SIGNATURE) != 0)
-      {
-         png_warning(png_ptr, "Invalid filter method in IHDR");
-         error = 1;
-      }
-   }
-
-#else
-   if (filter_type != PNG_FILTER_TYPE_BASE)
-   {
-      png_warning(png_ptr, "Unknown filter method in IHDR");
-      error = 1;
-   }
-#endif
-
-   if (error == 1)
-      png_error(png_ptr, "Invalid IHDR data");
-}
-
-#if defined(PNG_sCAL_SUPPORTED) || defined(PNG_pCAL_SUPPORTED)
-/* ASCII to fp functions */
-/* Check an ASCII formatted floating point value, see the more detailed
- * comments in pngpriv.h
- */
-/* The following is used internally to preserve the sticky flags */
-#define png_fp_add(state, flags) ((state) |= (flags))
-#define png_fp_set(state, value) ((state) = (value) | ((state) & PNG_FP_STICKY))
-
-int /* PRIVATE */
-png_check_fp_number(png_const_charp string, size_t size, int *statep,
-    size_t *whereami)
-{
-   int state = *statep;
-   size_t i = *whereami;
-
-   while (i < size)
-   {
-      int type;
-      /* First find the type of the next character */
-      switch (string[i])
-      {
-      case 43:  type = PNG_FP_SAW_SIGN;                   break;
-      case 45:  type = PNG_FP_SAW_SIGN + PNG_FP_NEGATIVE; break;
-      case 46:  type = PNG_FP_SAW_DOT;                    break;
-      case 48:  type = PNG_FP_SAW_DIGIT;                  break;
-      case 49: case 50: case 51: case 52:
-      case 53: case 54: case 55: case 56:
-      case 57:  type = PNG_FP_SAW_DIGIT + PNG_FP_NONZERO; break;
-      case 69:
-      case 101: type = PNG_FP_SAW_E;                      break;
-      default:  goto PNG_FP_End;
-      }
-
-      /* Now deal with this type according to the current
-       * state, the type is arranged to not overlap the
-       * bits of the PNG_FP_STATE.
-       */
-      switch ((state & PNG_FP_STATE) + (type & PNG_FP_SAW_ANY))
-      {
-      case PNG_FP_INTEGER + PNG_FP_SAW_SIGN:
-         if ((state & PNG_FP_SAW_ANY) != 0)
-            goto PNG_FP_End; /* not a part of the number */
-
-         png_fp_add(state, type);
-         break;
-
-      case PNG_FP_INTEGER + PNG_FP_SAW_DOT:
-         /* Ok as trailer, ok as lead of fraction. */
-         if ((state & PNG_FP_SAW_DOT) != 0) /* two dots */
-            goto PNG_FP_End;
-
-         else if ((state & PNG_FP_SAW_DIGIT) != 0) /* trailing dot? */
-            png_fp_add(state, type);
-
-         else
-            png_fp_set(state, PNG_FP_FRACTION | type);
-
-         break;
-
-      case PNG_FP_INTEGER + PNG_FP_SAW_DIGIT:
-         if ((state & PNG_FP_SAW_DOT) != 0) /* delayed fraction */
-            png_fp_set(state, PNG_FP_FRACTION | PNG_FP_SAW_DOT);
-
-         png_fp_add(state, type | PNG_FP_WAS_VALID);
-
-         break;
-
-      case PNG_FP_INTEGER + PNG_FP_SAW_E:
-         if ((state & PNG_FP_SAW_DIGIT) == 0)
-            goto PNG_FP_End;
-
-         png_fp_set(state, PNG_FP_EXPONENT);
-
-         break;
-
-   /* case PNG_FP_FRACTION + PNG_FP_SAW_SIGN:
-         goto PNG_FP_End; ** no sign in fraction */
-
-   /* case PNG_FP_FRACTION + PNG_FP_SAW_DOT:
-         goto PNG_FP_End; ** Because SAW_DOT is always set */
-
-      case PNG_FP_FRACTION + PNG_FP_SAW_DIGIT:
-         png_fp_add(state, type | PNG_FP_WAS_VALID);
-         break;
-
-      case PNG_FP_FRACTION + PNG_FP_SAW_E:
-         /* This is correct because the trailing '.' on an
-          * integer is handled above - so we can only get here
-          * with the sequence ".E" (with no preceding digits).
-          */
-         if ((state & PNG_FP_SAW_DIGIT) == 0)
-            goto PNG_FP_End;
-
-         png_fp_set(state, PNG_FP_EXPONENT);
-
-         break;
-
-      case PNG_FP_EXPONENT + PNG_FP_SAW_SIGN:
-         if ((state & PNG_FP_SAW_ANY) != 0)
-            goto PNG_FP_End; /* not a part of the number */
-
-         png_fp_add(state, PNG_FP_SAW_SIGN);
-
-         break;
-
-   /* case PNG_FP_EXPONENT + PNG_FP_SAW_DOT:
-         goto PNG_FP_End; */
-
-      case PNG_FP_EXPONENT + PNG_FP_SAW_DIGIT:
-         png_fp_add(state, PNG_FP_SAW_DIGIT | PNG_FP_WAS_VALID);
-
-         break;
-
-   /* case PNG_FP_EXPONEXT + PNG_FP_SAW_E:
-         goto PNG_FP_End; */
-
-      default: goto PNG_FP_End; /* I.e. break 2 */
-      }
-
-      /* The character seems ok, continue. */
-      ++i;
-   }
-
-PNG_FP_End:
-   /* Here at the end, update the state and return the correct
-    * return code.
-    */
-   *statep = state;
-   *whereami = i;
-
-   return (state & PNG_FP_SAW_DIGIT) != 0;
-}
-
-
-/* The same but for a complete string. */
-int
-png_check_fp_string(png_const_charp string, size_t size)
-{
-   int        state=0;
-   size_t char_index=0;
-
-   if (png_check_fp_number(string, size, &state, &char_index) != 0 &&
-      (char_index == size || string[char_index] == 0))
-      return state /* must be non-zero - see above */;
-
-   return 0; /* i.e. fail */
-}
-#endif /* pCAL || sCAL */
-
-#ifdef PNG_sCAL_SUPPORTED
-#  ifdef PNG_FLOATING_POINT_SUPPORTED
-/* Utility used below - a simple accurate power of ten from an integral
- * exponent.
- */
-static double
-png_pow10(int power)
-{
-   int recip = 0;
-   double d = 1;
-
-   /* Handle negative exponent with a reciprocal at the end because
-    * 10 is exact whereas .1 is inexact in base 2
-    */
-   if (power < 0)
-   {
-      if (power < DBL_MIN_10_EXP) return 0;
-      recip = 1; power = -power;
-   }
-
-   if (power > 0)
-   {
-      /* Decompose power bitwise. */
-      double mult = 10;
-      do
-      {
-         if (power & 1) d *= mult;
-         mult *= mult;
-         power >>= 1;
-      }
-      while (power > 0);
-
-      if (recip != 0) d = 1/d;
-   }
-   /* else power is 0 and d is 1 */
-
-   return d;
-}
-
-/* Function to format a floating point value in ASCII with a given
- * precision.
- */
-void /* PRIVATE */
-png_ascii_from_fp(png_const_structrp png_ptr, png_charp ascii, size_t size,
-    double fp, unsigned int precision)
-{
-   /* We use standard functions from math.h, but not printf because
-    * that would require stdio.  The caller must supply a buffer of
-    * sufficient size or we will png_error.  The tests on size and
-    * the space in ascii[] consumed are indicated below.
-    */
-   if (precision < 1)
-      precision = DBL_DIG;
-
-   /* Enforce the limit of the implementation precision too. */
-   if (precision > DBL_DIG+1)
-      precision = DBL_DIG+1;
-
-   /* Basic sanity checks */
-   if (size >= precision+5) /* See the requirements below. */
-   {
-      if (fp < 0)
-      {
-         fp = -fp;
-         *ascii++ = 45; /* '-'  PLUS 1 TOTAL 1 */
-         --size;
-      }
-
-      if (fp >= DBL_MIN && fp <= DBL_MAX)
-      {
-         int exp_b10;   /* A base 10 exponent */
-         double base;   /* 10^exp_b10 */
-
-         /* First extract a base 10 exponent of the number,
-          * the calculation below rounds down when converting
-          * from base 2 to base 10 (multiply by log10(2) -
-          * 0.3010, but 77/256 is 0.3008, so exp_b10 needs to
-          * be increased.  Note that the arithmetic shift
-          * performs a floor() unlike C arithmetic - using a
-          * C multiply would break the following for negative
-          * exponents.
-          */
-         (void)frexp(fp, &exp_b10); /* exponent to base 2 */
-
-         exp_b10 = (exp_b10 * 77) >> 8; /* <= exponent to base 10 */
-
-         /* Avoid underflow here. */
-         base = png_pow10(exp_b10); /* May underflow */
-
-         while (base < DBL_MIN || base < fp)
-         {
-            /* And this may overflow. */
-            double test = png_pow10(exp_b10+1);
-
-            if (test <= DBL_MAX)
-            {
-               ++exp_b10; base = test;
-            }
-
-            else
-               break;
-         }
-
-         /* Normalize fp and correct exp_b10, after this fp is in the
-          * range [.1,1) and exp_b10 is both the exponent and the digit
-          * *before* which the decimal point should be inserted
-          * (starting with 0 for the first digit).  Note that this
-          * works even if 10^exp_b10 is out of range because of the
-          * test on DBL_MAX above.
-          */
-         fp /= base;
-         while (fp >= 1)
-         {
-            fp /= 10; ++exp_b10;
-         }
-
-         /* Because of the code above fp may, at this point, be
-          * less than .1, this is ok because the code below can
-          * handle the leading zeros this generates, so no attempt
-          * is made to correct that here.
-          */
-
-         {
-            unsigned int czero, clead, cdigits;
-            char exponent[10];
-
-            /* Allow up to two leading zeros - this will not lengthen
-             * the number compared to using E-n.
-             */
-            if (exp_b10 < 0 && exp_b10 > -3) /* PLUS 3 TOTAL 4 */
-            {
-               czero = 0U-exp_b10; /* PLUS 2 digits: TOTAL 3 */
-               exp_b10 = 0;      /* Dot added below before first output. */
-            }
-            else
-               czero = 0;    /* No zeros to add */
-
-            /* Generate the digit list, stripping trailing zeros and
-             * inserting a '.' before a digit if the exponent is 0.
-             */
-            clead = czero; /* Count of leading zeros */
-            cdigits = 0;   /* Count of digits in list. */
-
-            do
-            {
-               double d;
-
-               fp *= 10;
-               /* Use modf here, not floor and subtract, so that
-                * the separation is done in one step.  At the end
-                * of the loop don't break the number into parts so
-                * that the final digit is rounded.
-                */
-               if (cdigits+czero+1 < precision+clead)
-                  fp = modf(fp, &d);
-
-               else
-               {
-                  d = floor(fp + .5);
-
-                  if (d > 9)
-                  {
-                     /* Rounding up to 10, handle that here. */
-                     if (czero > 0)
-                     {
-                        --czero; d = 1;
-                        if (cdigits == 0) --clead;
-                     }
-                     else
-                     {
-                        while (cdigits > 0 && d > 9)
-                        {
-                           int ch = *--ascii;
-
-                           if (exp_b10 != (-1))
-                              ++exp_b10;
-
-                           else if (ch == 46)
-                           {
-                              ch = *--ascii; ++size;
-                              /* Advance exp_b10 to '1', so that the
-                               * decimal point happens after the
-                               * previous digit.
-                               */
-                              exp_b10 = 1;
-                           }
-
-                           --cdigits;
-                           d = ch - 47;  /* I.e. 1+(ch-48) */
-                        }
-
-                        /* Did we reach the beginning? If so adjust the
-                         * exponent but take into account the leading
-                         * decimal point.
-                         */
-                        if (d > 9)  /* cdigits == 0 */
-                        {
-                           if (exp_b10 == (-1))
-                           {
-                              /* Leading decimal point (plus zeros?), if
-                               * we lose the decimal point here it must
-                               * be reentered below.
-                               */
-                              int ch = *--ascii;
-
-                              if (ch == 46)
-                              {
-                                 ++size; exp_b10 = 1;
-                              }
-
-                              /* Else lost a leading zero, so 'exp_b10' is
-                               * still ok at (-1)
-                               */
-                           }
-                           else
-                              ++exp_b10;
-
-                           /* In all cases we output a '1' */
-                           d = 1;
-                        }
-                     }
-                  }
-                  fp = 0; /* Guarantees termination below. */
-               }
-
-               if (d == 0)
-               {
-                  ++czero;
-                  if (cdigits == 0) ++clead;
-               }
-               else
-               {
-                  /* Included embedded zeros in the digit count. */
-                  cdigits += czero - clead;
-                  clead = 0;
-
-                  while (czero > 0)
-                  {
-                     /* exp_b10 == (-1) means we just output the decimal
-                      * place - after the DP don't adjust 'exp_b10' any
-                      * more!
-                      */
-                     if (exp_b10 != (-1))
-                     {
-                        if (exp_b10 == 0)
-                        {
-                           *ascii++ = 46; --size;
-                        }
-                        /* PLUS 1: TOTAL 4 */
-                        --exp_b10;
-                     }
-                     *ascii++ = 48; --czero;
-                  }
-
-                  if (exp_b10 != (-1))
-                  {
-                     if (exp_b10 == 0)
-                     {
-                        *ascii++ = 46; --size; /* counted above */
-                     }
-
-                     --exp_b10;
-                  }
-                  *ascii++ = (char)(48 + (int)d); ++cdigits;
-               }
-            }
-            while (cdigits+czero < precision+clead && fp > DBL_MIN);
-
-            /* The total output count (max) is now 4+precision */
-
-            /* Check for an exponent, if we don't need one we are
-             * done and just need to terminate the string.  At this
-             * point, exp_b10==(-1) is effectively a flag: it got
-             * to '-1' because of the decrement, after outputting
-             * the decimal point above. (The exponent required is
-             * *not* -1.)
-             */
-            if (exp_b10 >= (-1) && exp_b10 <= 2)
-            {
-               /* The following only happens if we didn't output the
-                * leading zeros above for negative exponent, so this
-                * doesn't add to the digit requirement.  Note that the
-                * two zeros here can only be output if the two leading
-                * zeros were *not* output, so this doesn't increase
-                * the output count.
-                */
-               while (exp_b10-- > 0) *ascii++ = 48;
-
-               *ascii = 0;
-
-               /* Total buffer requirement (including the '\0') is
-                * 5+precision - see check at the start.
-                */
-               return;
-            }
-
-            /* Here if an exponent is required, adjust size for
-             * the digits we output but did not count.  The total
-             * digit output here so far is at most 1+precision - no
-             * decimal point and no leading or trailing zeros have
-             * been output.
-             */
-            size -= cdigits;
-
-            *ascii++ = 69; --size;    /* 'E': PLUS 1 TOTAL 2+precision */
-
-            /* The following use of an unsigned temporary avoids ambiguities in
-             * the signed arithmetic on exp_b10 and permits GCC at least to do
-             * better optimization.
-             */
-            {
-               unsigned int uexp_b10;
-
-               if (exp_b10 < 0)
-               {
-                  *ascii++ = 45; --size; /* '-': PLUS 1 TOTAL 3+precision */
-                  uexp_b10 = 0U-exp_b10;
-               }
-
-               else
-                  uexp_b10 = 0U+exp_b10;
-
-               cdigits = 0;
-
-               while (uexp_b10 > 0)
-               {
-                  exponent[cdigits++] = (char)(48 + uexp_b10 % 10);
-                  uexp_b10 /= 10;
-               }
-            }
-
-            /* Need another size check here for the exponent digits, so
-             * this need not be considered above.
-             */
-            if (size > cdigits)
-            {
-               while (cdigits > 0) *ascii++ = exponent[--cdigits];
-
-               *ascii = 0;
-
-               return;
-            }
-         }
-      }
-      else if (!(fp >= DBL_MIN))
-      {
-         *ascii++ = 48; /* '0' */
-         *ascii = 0;
-         return;
-      }
-      else
-      {
-         *ascii++ = 105; /* 'i' */
-         *ascii++ = 110; /* 'n' */
-         *ascii++ = 102; /* 'f' */
-         *ascii = 0;
-         return;
-      }
-   }
-
-   /* Here on buffer too small. */
-   png_error(png_ptr, "ASCII conversion buffer too small");
-}
-#  endif /* FLOATING_POINT */
-
-#  ifdef PNG_FIXED_POINT_SUPPORTED
-/* Function to format a fixed point value in ASCII.
- */
-void /* PRIVATE */
-png_ascii_from_fixed(png_const_structrp png_ptr, png_charp ascii,
-    size_t size, png_fixed_point fp)
-{
-   /* Require space for 10 decimal digits, a decimal point, a minus sign and a
-    * trailing \0, 13 characters:
-    */
-   if (size > 12)
-   {
-      png_uint_32 num;
-
-      /* Avoid overflow here on the minimum integer. */
-      if (fp < 0)
-      {
-         *ascii++ = 45; num = (png_uint_32)(-fp);
-      }
-      else
-         num = (png_uint_32)fp;
-
-      if (num <= 0x80000000) /* else overflowed */
-      {
-         unsigned int ndigits = 0, first = 16 /* flag value */;
-         char digits[10] = {0};
-
-         while (num)
-         {
-            /* Split the low digit off num: */
-            unsigned int tmp = num/10;
-            num -= tmp*10;
-            digits[ndigits++] = (char)(48 + num);
-            /* Record the first non-zero digit, note that this is a number
-             * starting at 1, it's not actually the array index.
-             */
-            if (first == 16 && num > 0)
-               first = ndigits;
-            num = tmp;
-         }
-
-         if (ndigits > 0)
-         {
-            while (ndigits > 5) *ascii++ = digits[--ndigits];
-            /* The remaining digits are fractional digits, ndigits is '5' or
-             * smaller at this point.  It is certainly not zero.  Check for a
-             * non-zero fractional digit:
-             */
-            if (first <= 5)
-            {
-               unsigned int i;
-               *ascii++ = 46; /* decimal point */
-               /* ndigits may be <5 for small numbers, output leading zeros
-                * then ndigits digits to first:
-                */
-               i = 5;
-               while (ndigits < i)
-               {
-                  *ascii++ = 48; --i;
-               }
-               while (ndigits >= first) *ascii++ = digits[--ndigits];
-               /* Don't output the trailing zeros! */
-            }
-         }
-         else
-            *ascii++ = 48;
-
-         /* And null terminate the string: */
-         *ascii = 0;
-         return;
-      }
-   }
-
-   /* Here on buffer too small. */
-   png_error(png_ptr, "ASCII conversion buffer too small");
-}
-#   endif /* FIXED_POINT */
-#endif /* SCAL */
-
-#if defined(PNG_FLOATING_POINT_SUPPORTED) && \
-   !defined(PNG_FIXED_POINT_MACRO_SUPPORTED) && \
-   (defined(PNG_gAMA_SUPPORTED) || defined(PNG_cHRM_SUPPORTED) || \
-   defined(PNG_sCAL_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) || \
-   defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)) || \
-   (defined(PNG_sCAL_SUPPORTED) && \
-   defined(PNG_FLOATING_ARITHMETIC_SUPPORTED))
-png_fixed_point
-png_fixed(png_const_structrp png_ptr, double fp, png_const_charp text)
-{
-   double r = floor(100000 * fp + .5);
-
-   if (r > 2147483647. || r < -2147483648.)
-      png_fixed_error(png_ptr, text);
-
-#  ifndef PNG_ERROR_TEXT_SUPPORTED
-   PNG_UNUSED(text)
-#  endif
-
-   return (png_fixed_point)r;
-}
-#endif
-
-#if defined(PNG_GAMMA_SUPPORTED) || defined(PNG_COLORSPACE_SUPPORTED) ||\
-    defined(PNG_INCH_CONVERSIONS_SUPPORTED) || defined(PNG_READ_pHYs_SUPPORTED)
-/* muldiv functions */
-/* This API takes signed arguments and rounds the result to the nearest
- * integer (or, for a fixed point number - the standard argument - to
- * the nearest .00001).  Overflow and divide by zero are signalled in
- * the result, a boolean - true on success, false on overflow.
- */
-int
-png_muldiv(png_fixed_point_p res, png_fixed_point a, png_int_32 times,
-    png_int_32 divisor)
-{
-   /* Return a * times / divisor, rounded. */
-   if (divisor != 0)
-   {
-      if (a == 0 || times == 0)
-      {
-         *res = 0;
-         return 1;
-      }
-      else
-      {
-#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
-         double r = a;
-         r *= times;
-         r /= divisor;
-         r = floor(r+.5);
-
-         /* A png_fixed_point is a 32-bit integer. */
-         if (r <= 2147483647. && r >= -2147483648.)
-         {
-            *res = (png_fixed_point)r;
-            return 1;
-         }
-#else
-         int negative = 0;
-         png_uint_32 A, T, D;
-         png_uint_32 s16, s32, s00;
-
-         if (a < 0)
-            negative = 1, A = -a;
-         else
-            A = a;
-
-         if (times < 0)
-            negative = !negative, T = -times;
-         else
-            T = times;
-
-         if (divisor < 0)
-            negative = !negative, D = -divisor;
-         else
-            D = divisor;
-
-         /* Following can't overflow because the arguments only
-          * have 31 bits each, however the result may be 32 bits.
-          */
-         s16 = (A >> 16) * (T & 0xffff) +
-                           (A & 0xffff) * (T >> 16);
-         /* Can't overflow because the a*times bit is only 30
-          * bits at most.
-          */
-         s32 = (A >> 16) * (T >> 16) + (s16 >> 16);
-         s00 = (A & 0xffff) * (T & 0xffff);
-
-         s16 = (s16 & 0xffff) << 16;
-         s00 += s16;
-
-         if (s00 < s16)
-            ++s32; /* carry */
-
-         if (s32 < D) /* else overflow */
-         {
-            /* s32.s00 is now the 64-bit product, do a standard
-             * division, we know that s32 < D, so the maximum
-             * required shift is 31.
-             */
-            int bitshift = 32;
-            png_fixed_point result = 0; /* NOTE: signed */
-
-            while (--bitshift >= 0)
-            {
-               png_uint_32 d32, d00;
-
-               if (bitshift > 0)
-                  d32 = D >> (32-bitshift), d00 = D << bitshift;
-
-               else
-                  d32 = 0, d00 = D;
-
-               if (s32 > d32)
-               {
-                  if (s00 < d00) --s32; /* carry */
-                  s32 -= d32, s00 -= d00, result += 1<<bitshift;
-               }
-
-               else
-                  if (s32 == d32 && s00 >= d00)
-                     s32 = 0, s00 -= d00, result += 1<<bitshift;
-            }
-
-            /* Handle the rounding. */
-            if (s00 >= (D >> 1))
-               ++result;
-
-            if (negative != 0)
-               result = -result;
-
-            /* Check for overflow. */
-            if ((negative != 0 && result <= 0) ||
-                (negative == 0 && result >= 0))
-            {
-               *res = result;
-               return 1;
-            }
-         }
-#endif
-      }
-   }
-
-   return 0;
-}
-#endif /* READ_GAMMA || INCH_CONVERSIONS */
-
-#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_INCH_CONVERSIONS_SUPPORTED)
-/* The following is for when the caller doesn't much care about the
- * result.
- */
-png_fixed_point
-png_muldiv_warn(png_const_structrp png_ptr, png_fixed_point a, png_int_32 times,
-    png_int_32 divisor)
-{
-   png_fixed_point result;
-
-   if (png_muldiv(&result, a, times, divisor) != 0)
-      return result;
-
-   png_warning(png_ptr, "fixed point overflow ignored");
-   return 0;
-}
-#endif
-
-#ifdef PNG_GAMMA_SUPPORTED /* more fixed point functions for gamma */
-/* Calculate a reciprocal, return 0 on div-by-zero or overflow. */
-png_fixed_point
-png_reciprocal(png_fixed_point a)
-{
-#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
-   double r = floor(1E10/a+.5);
-
-   if (r <= 2147483647. && r >= -2147483648.)
-      return (png_fixed_point)r;
-#else
-   png_fixed_point res;
-
-   if (png_muldiv(&res, 100000, 100000, a) != 0)
-      return res;
-#endif
-
-   return 0; /* error/overflow */
-}
-
-/* This is the shared test on whether a gamma value is 'significant' - whether
- * it is worth doing gamma correction.
- */
-int /* PRIVATE */
-png_gamma_significant(png_fixed_point gamma_val)
-{
-   return gamma_val < PNG_FP_1 - PNG_GAMMA_THRESHOLD_FIXED ||
-       gamma_val > PNG_FP_1 + PNG_GAMMA_THRESHOLD_FIXED;
-}
-#endif
-
-#ifdef PNG_READ_GAMMA_SUPPORTED
-#ifdef PNG_16BIT_SUPPORTED
-/* A local convenience routine. */
-static png_fixed_point
-png_product2(png_fixed_point a, png_fixed_point b)
-{
-   /* The required result is 1/a * 1/b; the following preserves accuracy. */
-#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
-   double r = a * 1E-5;
-   r *= b;
-   r = floor(r+.5);
-
-   if (r <= 2147483647. && r >= -2147483648.)
-      return (png_fixed_point)r;
-#else
-   png_fixed_point res;
-
-   if (png_muldiv(&res, a, b, 100000) != 0)
-      return res;
-#endif
-
-   return 0; /* overflow */
-}
-#endif /* 16BIT */
-
-/* The inverse of the above. */
-png_fixed_point
-png_reciprocal2(png_fixed_point a, png_fixed_point b)
-{
-   /* The required result is 1/a * 1/b; the following preserves accuracy. */
-#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
-   if (a != 0 && b != 0)
-   {
-      double r = 1E15/a;
-      r /= b;
-      r = floor(r+.5);
-
-      if (r <= 2147483647. && r >= -2147483648.)
-         return (png_fixed_point)r;
-   }
-#else
-   /* This may overflow because the range of png_fixed_point isn't symmetric,
-    * but this API is only used for the product of file and screen gamma so it
-    * doesn't matter that the smallest number it can produce is 1/21474, not
-    * 1/100000
-    */
-   png_fixed_point res = png_product2(a, b);
-
-   if (res != 0)
-      return png_reciprocal(res);
-#endif
-
-   return 0; /* overflow */
-}
-#endif /* READ_GAMMA */
-
-#ifdef PNG_READ_GAMMA_SUPPORTED /* gamma table code */
-#ifndef PNG_FLOATING_ARITHMETIC_SUPPORTED
-/* Fixed point gamma.
- *
- * The code to calculate the tables used below can be found in the shell script
- * contrib/tools/intgamma.sh
- *
- * To calculate gamma this code implements fast log() and exp() calls using only
- * fixed point arithmetic.  This code has sufficient precision for either 8-bit
- * or 16-bit sample values.
- *
- * The tables used here were calculated using simple 'bc' programs, but C double
- * precision floating point arithmetic would work fine.
- *
- * 8-bit log table
- *   This is a table of -log(value/255)/log(2) for 'value' in the range 128 to
- *   255, so it's the base 2 logarithm of a normalized 8-bit floating point
- *   mantissa.  The numbers are 32-bit fractions.
- */
-static const png_uint_32
-png_8bit_l2[128] =
-{
-   4270715492U, 4222494797U, 4174646467U, 4127164793U, 4080044201U, 4033279239U,
-   3986864580U, 3940795015U, 3895065449U, 3849670902U, 3804606499U, 3759867474U,
-   3715449162U, 3671346997U, 3627556511U, 3584073329U, 3540893168U, 3498011834U,
-   3455425220U, 3413129301U, 3371120137U, 3329393864U, 3287946700U, 3246774933U,
-   3205874930U, 3165243125U, 3124876025U, 3084770202U, 3044922296U, 3005329011U,
-   2965987113U, 2926893432U, 2888044853U, 2849438323U, 2811070844U, 2772939474U,
-   2735041326U, 2697373562U, 2659933400U, 2622718104U, 2585724991U, 2548951424U,
-   2512394810U, 2476052606U, 2439922311U, 2404001468U, 2368287663U, 2332778523U,
-   2297471715U, 2262364947U, 2227455964U, 2192742551U, 2158222529U, 2123893754U,
-   2089754119U, 2055801552U, 2022034013U, 1988449497U, 1955046031U, 1921821672U,
-   1888774511U, 1855902668U, 1823204291U, 1790677560U, 1758320682U, 1726131893U,
-   1694109454U, 1662251657U, 1630556815U, 1599023271U, 1567649391U, 1536433567U,
-   1505374214U, 1474469770U, 1443718700U, 1413119487U, 1382670639U, 1352370686U,
-   1322218179U, 1292211689U, 1262349810U, 1232631153U, 1203054352U, 1173618059U,
-   1144320946U, 1115161701U, 1086139034U, 1057251672U, 1028498358U, 999877854U,
-   971388940U, 943030410U, 914801076U, 886699767U, 858725327U, 830876614U,
-   803152505U, 775551890U, 748073672U, 720716771U, 693480120U, 666362667U,
-   639363374U, 612481215U, 585715177U, 559064263U, 532527486U, 506103872U,
-   479792461U, 453592303U, 427502463U, 401522014U, 375650043U, 349885648U,
-   324227938U, 298676034U, 273229066U, 247886176U, 222646516U, 197509248U,
-   172473545U, 147538590U, 122703574U, 97967701U, 73330182U, 48790236U,
-   24347096U, 0U
-
-#if 0
-   /* The following are the values for 16-bit tables - these work fine for the
-    * 8-bit conversions but produce very slightly larger errors in the 16-bit
-    * log (about 1.2 as opposed to 0.7 absolute error in the final value).  To
-    * use these all the shifts below must be adjusted appropriately.
-    */
-   65166, 64430, 63700, 62976, 62257, 61543, 60835, 60132, 59434, 58741, 58054,
-   57371, 56693, 56020, 55352, 54689, 54030, 53375, 52726, 52080, 51439, 50803,
-   50170, 49542, 48918, 48298, 47682, 47070, 46462, 45858, 45257, 44661, 44068,
-   43479, 42894, 42312, 41733, 41159, 40587, 40020, 39455, 38894, 38336, 37782,
-   37230, 36682, 36137, 35595, 35057, 34521, 33988, 33459, 32932, 32408, 31887,
-   31369, 30854, 30341, 29832, 29325, 28820, 28319, 27820, 27324, 26830, 26339,
-   25850, 25364, 24880, 24399, 23920, 23444, 22970, 22499, 22029, 21562, 21098,
-   20636, 20175, 19718, 19262, 18808, 18357, 17908, 17461, 17016, 16573, 16132,
-   15694, 15257, 14822, 14390, 13959, 13530, 13103, 12678, 12255, 11834, 11415,
-   10997, 10582, 10168, 9756, 9346, 8937, 8531, 8126, 7723, 7321, 6921, 6523,
-   6127, 5732, 5339, 4947, 4557, 4169, 3782, 3397, 3014, 2632, 2251, 1872, 1495,
-   1119, 744, 372
-#endif
-};
-
-static png_int_32
-png_log8bit(unsigned int x)
-{
-   unsigned int lg2 = 0;
-   /* Each time 'x' is multiplied by 2, 1 must be subtracted off the final log,
-    * because the log is actually negate that means adding 1.  The final
-    * returned value thus has the range 0 (for 255 input) to 7.994 (for 1
-    * input), return -1 for the overflow (log 0) case, - so the result is
-    * always at most 19 bits.
-    */
-   if ((x &= 0xff) == 0)
-      return -1;
-
-   if ((x & 0xf0) == 0)
-      lg2  = 4, x <<= 4;
-
-   if ((x & 0xc0) == 0)
-      lg2 += 2, x <<= 2;
-
-   if ((x & 0x80) == 0)
-      lg2 += 1, x <<= 1;
-
-   /* result is at most 19 bits, so this cast is safe: */
-   return (png_int_32)((lg2 << 16) + ((png_8bit_l2[x-128]+32768)>>16));
-}
-
-/* The above gives exact (to 16 binary places) log2 values for 8-bit images,
- * for 16-bit images we use the most significant 8 bits of the 16-bit value to
- * get an approximation then multiply the approximation by a correction factor
- * determined by the remaining up to 8 bits.  This requires an additional step
- * in the 16-bit case.
- *
- * We want log2(value/65535), we have log2(v'/255), where:
- *
- *    value = v' * 256 + v''
- *          = v' * f
- *
- * So f is value/v', which is equal to (256+v''/v') since v' is in the range 128
- * to 255 and v'' is in the range 0 to 255 f will be in the range 256 to less
- * than 258.  The final factor also needs to correct for the fact that our 8-bit
- * value is scaled by 255, whereas the 16-bit values must be scaled by 65535.
- *
- * This gives a final formula using a calculated value 'x' which is value/v' and
- * scaling by 65536 to match the above table:
- *
- *   log2(x/257) * 65536
- *
- * Since these numbers are so close to '1' we can use simple linear
- * interpolation between the two end values 256/257 (result -368.61) and 258/257
- * (result 367.179).  The values used below are scaled by a further 64 to give
- * 16-bit precision in the interpolation:
- *
- * Start (256): -23591
- * Zero  (257):      0
- * End   (258):  23499
- */
-#ifdef PNG_16BIT_SUPPORTED
-static png_int_32
-png_log16bit(png_uint_32 x)
-{
-   unsigned int lg2 = 0;
-
-   /* As above, but now the input has 16 bits. */
-   if ((x &= 0xffff) == 0)
-      return -1;
-
-   if ((x & 0xff00) == 0)
-      lg2  = 8, x <<= 8;
-
-   if ((x & 0xf000) == 0)
-      lg2 += 4, x <<= 4;
-
-   if ((x & 0xc000) == 0)
-      lg2 += 2, x <<= 2;
-
-   if ((x & 0x8000) == 0)
-      lg2 += 1, x <<= 1;
-
-   /* Calculate the base logarithm from the top 8 bits as a 28-bit fractional
-    * value.
-    */
-   lg2 <<= 28;
-   lg2 += (png_8bit_l2[(x>>8)-128]+8) >> 4;
-
-   /* Now we need to interpolate the factor, this requires a division by the top
-    * 8 bits.  Do this with maximum precision.
-    */
-   x = ((x << 16) + (x >> 9)) / (x >> 8);
-
-   /* Since we divided by the top 8 bits of 'x' there will be a '1' at 1<<24,
-    * the value at 1<<16 (ignoring this) will be 0 or 1; this gives us exactly
-    * 16 bits to interpolate to get the low bits of the result.  Round the
-    * answer.  Note that the end point values are scaled by 64 to retain overall
-    * precision and that 'lg2' is current scaled by an extra 12 bits, so adjust
-    * the overall scaling by 6-12.  Round at every step.
-    */
-   x -= 1U << 24;
-
-   if (x <= 65536U) /* <= '257' */
-      lg2 += ((23591U * (65536U-x)) + (1U << (16+6-12-1))) >> (16+6-12);
-
-   else
-      lg2 -= ((23499U * (x-65536U)) + (1U << (16+6-12-1))) >> (16+6-12);
-
-   /* Safe, because the result can't have more than 20 bits: */
-   return (png_int_32)((lg2 + 2048) >> 12);
-}
-#endif /* 16BIT */
-
-/* The 'exp()' case must invert the above, taking a 20-bit fixed point
- * logarithmic value and returning a 16 or 8-bit number as appropriate.  In
- * each case only the low 16 bits are relevant - the fraction - since the
- * integer bits (the top 4) simply determine a shift.
- *
- * The worst case is the 16-bit distinction between 65535 and 65534. This
- * requires perhaps spurious accuracy in the decoding of the logarithm to
- * distinguish log2(65535/65534.5) - 10^-5 or 17 bits.  There is little chance
- * of getting this accuracy in practice.
- *
- * To deal with this the following exp() function works out the exponent of the
- * fractional part of the logarithm by using an accurate 32-bit value from the
- * top four fractional bits then multiplying in the remaining bits.
- */
-static const png_uint_32
-png_32bit_exp[16] =
-{
-   /* NOTE: the first entry is deliberately set to the maximum 32-bit value. */
-   4294967295U, 4112874773U, 3938502376U, 3771522796U, 3611622603U, 3458501653U,
-   3311872529U, 3171459999U, 3037000500U, 2908241642U, 2784941738U, 2666869345U,
-   2553802834U, 2445529972U, 2341847524U, 2242560872U
-};
-
-/* Adjustment table; provided to explain the numbers in the code below. */
-#if 0
-for (i=11;i>=0;--i){ print i, " ", (1 - e(-(2^i)/65536*l(2))) * 2^(32-i), "\n"}
-   11 44937.64284865548751208448
-   10 45180.98734845585101160448
-    9 45303.31936980687359311872
-    8 45364.65110595323018870784
-    7 45395.35850361789624614912
-    6 45410.72259715102037508096
-    5 45418.40724413220722311168
-    4 45422.25021786898173001728
-    3 45424.17186732298419044352
-    2 45425.13273269940811464704
-    1 45425.61317555035558641664
-    0 45425.85339951654943850496
-#endif
-
-static png_uint_32
-png_exp(png_fixed_point x)
-{
-   if (x > 0 && x <= 0xfffff) /* Else overflow or zero (underflow) */
-   {
-      /* Obtain a 4-bit approximation */
-      png_uint_32 e = png_32bit_exp[(x >> 12) & 0x0f];
-
-      /* Incorporate the low 12 bits - these decrease the returned value by
-       * multiplying by a number less than 1 if the bit is set.  The multiplier
-       * is determined by the above table and the shift. Notice that the values
-       * converge on 45426 and this is used to allow linear interpolation of the
-       * low bits.
-       */
-      if (x & 0x800)
-         e -= (((e >> 16) * 44938U) +  16U) >> 5;
-
-      if (x & 0x400)
-         e -= (((e >> 16) * 45181U) +  32U) >> 6;
-
-      if (x & 0x200)
-         e -= (((e >> 16) * 45303U) +  64U) >> 7;
-
-      if (x & 0x100)
-         e -= (((e >> 16) * 45365U) + 128U) >> 8;
-
-      if (x & 0x080)
-         e -= (((e >> 16) * 45395U) + 256U) >> 9;
-
-      if (x & 0x040)
-         e -= (((e >> 16) * 45410U) + 512U) >> 10;
-
-      /* And handle the low 6 bits in a single block. */
-      e -= (((e >> 16) * 355U * (x & 0x3fU)) + 256U) >> 9;
-
-      /* Handle the upper bits of x. */
-      e >>= x >> 16;
-      return e;
-   }
-
-   /* Check for overflow */
-   if (x <= 0)
-      return png_32bit_exp[0];
-
-   /* Else underflow */
-   return 0;
-}
-
-static png_byte
-png_exp8bit(png_fixed_point lg2)
-{
-   /* Get a 32-bit value: */
-   png_uint_32 x = png_exp(lg2);
-
-   /* Convert the 32-bit value to 0..255 by multiplying by 256-1. Note that the
-    * second, rounding, step can't overflow because of the first, subtraction,
-    * step.
-    */
-   x -= x >> 8;
-   return (png_byte)(((x + 0x7fffffU) >> 24) & 0xff);
-}
-
-#ifdef PNG_16BIT_SUPPORTED
-static png_uint_16
-png_exp16bit(png_fixed_point lg2)
-{
-   /* Get a 32-bit value: */
-   png_uint_32 x = png_exp(lg2);
-
-   /* Convert the 32-bit value to 0..65535 by multiplying by 65536-1: */
-   x -= x >> 16;
-   return (png_uint_16)((x + 32767U) >> 16);
-}
-#endif /* 16BIT */
-#endif /* FLOATING_ARITHMETIC */
-
-png_byte
-png_gamma_8bit_correct(unsigned int value, png_fixed_point gamma_val)
-{
-   if (value > 0 && value < 255)
-   {
-#     ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
-         /* 'value' is unsigned, ANSI-C90 requires the compiler to correctly
-          * convert this to a floating point value.  This includes values that
-          * would overflow if 'value' were to be converted to 'int'.
-          *
-          * Apparently GCC, however, does an intermediate conversion to (int)
-          * on some (ARM) but not all (x86) platforms, possibly because of
-          * hardware FP limitations.  (E.g. if the hardware conversion always
-          * assumes the integer register contains a signed value.)  This results
-          * in ANSI-C undefined behavior for large values.
-          *
-          * Other implementations on the same machine might actually be ANSI-C90
-          * conformant and therefore compile spurious extra code for the large
-          * values.
-          *
-          * We can be reasonably sure that an unsigned to float conversion
-          * won't be faster than an int to float one.  Therefore this code
-          * assumes responsibility for the undefined behavior, which it knows
-          * can't happen because of the check above.
-          *
-          * Note the argument to this routine is an (unsigned int) because, on
-          * 16-bit platforms, it is assigned a value which might be out of
-          * range for an (int); that would result in undefined behavior in the
-          * caller if the *argument* ('value') were to be declared (int).
-          */
-         double r = floor(255*pow((int)/*SAFE*/value/255.,gamma_val*.00001)+.5);
-         return (png_byte)r;
-#     else
-         png_int_32 lg2 = png_log8bit(value);
-         png_fixed_point res;
-
-         if (png_muldiv(&res, gamma_val, lg2, PNG_FP_1) != 0)
-            return png_exp8bit(res);
-
-         /* Overflow. */
-         value = 0;
-#     endif
-   }
-
-   return (png_byte)(value & 0xff);
-}
-
-#ifdef PNG_16BIT_SUPPORTED
-png_uint_16
-png_gamma_16bit_correct(unsigned int value, png_fixed_point gamma_val)
-{
-   if (value > 0 && value < 65535)
-   {
-# ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
-      /* The same (unsigned int)->(double) constraints apply here as above,
-       * however in this case the (unsigned int) to (int) conversion can
-       * overflow on an ANSI-C90 compliant system so the cast needs to ensure
-       * that this is not possible.
-       */
-      double r = floor(65535*pow((png_int_32)value/65535.,
-          gamma_val*.00001)+.5);
-      return (png_uint_16)r;
-# else
-      png_int_32 lg2 = png_log16bit(value);
-      png_fixed_point res;
-
-      if (png_muldiv(&res, gamma_val, lg2, PNG_FP_1) != 0)
-         return png_exp16bit(res);
-
-      /* Overflow. */
-      value = 0;
-# endif
-   }
-
-   return (png_uint_16)value;
-}
-#endif /* 16BIT */
-
-/* This does the right thing based on the bit_depth field of the
- * png_struct, interpreting values as 8-bit or 16-bit.  While the result
- * is nominally a 16-bit value if bit depth is 8 then the result is
- * 8-bit (as are the arguments.)
- */
-png_uint_16 /* PRIVATE */
-png_gamma_correct(png_structrp png_ptr, unsigned int value,
-    png_fixed_point gamma_val)
-{
-   if (png_ptr->bit_depth == 8)
-      return png_gamma_8bit_correct(value, gamma_val);
-
-#ifdef PNG_16BIT_SUPPORTED
-   else
-      return png_gamma_16bit_correct(value, gamma_val);
-#else
-      /* should not reach this */
-      return 0;
-#endif /* 16BIT */
-}
-
-#ifdef PNG_16BIT_SUPPORTED
-/* Internal function to build a single 16-bit table - the table consists of
- * 'num' 256 entry subtables, where 'num' is determined by 'shift' - the amount
- * to shift the input values right (or 16-number_of_signifiant_bits).
- *
- * The caller is responsible for ensuring that the table gets cleaned up on
- * png_error (i.e. if one of the mallocs below fails) - i.e. the *table argument
- * should be somewhere that will be cleaned.
- */
-static void
-png_build_16bit_table(png_structrp png_ptr, png_uint_16pp *ptable,
-    unsigned int shift, png_fixed_point gamma_val)
-{
-   /* Various values derived from 'shift': */
-   unsigned int num = 1U << (8U - shift);
-#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
-   /* CSE the division and work round wacky GCC warnings (see the comments
-    * in png_gamma_8bit_correct for where these come from.)
-    */
-   double fmax = 1.0 / (((png_int_32)1 << (16U - shift)) - 1);
-#endif
-   unsigned int max = (1U << (16U - shift)) - 1U;
-   unsigned int max_by_2 = 1U << (15U - shift);
-   unsigned int i;
-
-   png_uint_16pp table = *ptable =
-       (png_uint_16pp)png_calloc(png_ptr, num * (sizeof (png_uint_16p)));
-
-   for (i = 0; i < num; i++)
-   {
-      png_uint_16p sub_table = table[i] =
-          (png_uint_16p)png_malloc(png_ptr, 256 * (sizeof (png_uint_16)));
-
-      /* The 'threshold' test is repeated here because it can arise for one of
-       * the 16-bit tables even if the others don't hit it.
-       */
-      if (png_gamma_significant(gamma_val) != 0)
-      {
-         /* The old code would overflow at the end and this would cause the
-          * 'pow' function to return a result >1, resulting in an
-          * arithmetic error.  This code follows the spec exactly; ig is
-          * the recovered input sample, it always has 8-16 bits.
-          *
-          * We want input * 65535/max, rounded, the arithmetic fits in 32
-          * bits (unsigned) so long as max <= 32767.
-          */
-         unsigned int j;
-         for (j = 0; j < 256; j++)
-         {
-            png_uint_32 ig = (j << (8-shift)) + i;
-#           ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
-               /* Inline the 'max' scaling operation: */
-               /* See png_gamma_8bit_correct for why the cast to (int) is
-                * required here.
-                */
-               double d = floor(65535.*pow(ig*fmax, gamma_val*.00001)+.5);
-               sub_table[j] = (png_uint_16)d;
-#           else
-               if (shift != 0)
-                  ig = (ig * 65535U + max_by_2)/max;
-
-               sub_table[j] = png_gamma_16bit_correct(ig, gamma_val);
-#           endif
-         }
-      }
-      else
-      {
-         /* We must still build a table, but do it the fast way. */
-         unsigned int j;
-
-         for (j = 0; j < 256; j++)
-         {
-            png_uint_32 ig = (j << (8-shift)) + i;
-
-            if (shift != 0)
-               ig = (ig * 65535U + max_by_2)/max;
-
-            sub_table[j] = (png_uint_16)ig;
-         }
-      }
-   }
-}
-
-/* NOTE: this function expects the *inverse* of the overall gamma transformation
- * required.
- */
-static void
-png_build_16to8_table(png_structrp png_ptr, png_uint_16pp *ptable,
-    unsigned int shift, png_fixed_point gamma_val)
-{
-   unsigned int num = 1U << (8U - shift);
-   unsigned int max = (1U << (16U - shift))-1U;
-   unsigned int i;
-   png_uint_32 last;
-
-   png_uint_16pp table = *ptable =
-       (png_uint_16pp)png_calloc(png_ptr, num * (sizeof (png_uint_16p)));
-
-   /* 'num' is the number of tables and also the number of low bits of low
-    * bits of the input 16-bit value used to select a table.  Each table is
-    * itself indexed by the high 8 bits of the value.
-    */
-   for (i = 0; i < num; i++)
-      table[i] = (png_uint_16p)png_malloc(png_ptr,
-          256 * (sizeof (png_uint_16)));
-
-   /* 'gamma_val' is set to the reciprocal of the value calculated above, so
-    * pow(out,g) is an *input* value.  'last' is the last input value set.
-    *
-    * In the loop 'i' is used to find output values.  Since the output is
-    * 8-bit there are only 256 possible values.  The tables are set up to
-    * select the closest possible output value for each input by finding
-    * the input value at the boundary between each pair of output values
-    * and filling the table up to that boundary with the lower output
-    * value.
-    *
-    * The boundary values are 0.5,1.5..253.5,254.5.  Since these are 9-bit
-    * values the code below uses a 16-bit value in i; the values start at
-    * 128.5 (for 0.5) and step by 257, for a total of 254 values (the last
-    * entries are filled with 255).  Start i at 128 and fill all 'last'
-    * table entries <= 'max'
-    */
-   last = 0;
-   for (i = 0; i < 255; ++i) /* 8-bit output value */
-   {
-      /* Find the corresponding maximum input value */
-      png_uint_16 out = (png_uint_16)(i * 257U); /* 16-bit output value */
-
-      /* Find the boundary value in 16 bits: */
-      png_uint_32 bound = png_gamma_16bit_correct(out+128U, gamma_val);
-
-      /* Adjust (round) to (16-shift) bits: */
-      bound = (bound * max + 32768U)/65535U + 1U;
-
-      while (last < bound)
-      {
-         table[last & (0xffU >> shift)][last >> (8U - shift)] = out;
-         last++;
-      }
-   }
-
-   /* And fill in the final entries. */
-   while (last < (num << 8))
-   {
-      table[last & (0xff >> shift)][last >> (8U - shift)] = 65535U;
-      last++;
-   }
-}
-#endif /* 16BIT */
-
-/* Build a single 8-bit table: same as the 16-bit case but much simpler (and
- * typically much faster).  Note that libpng currently does no sBIT processing
- * (apparently contrary to the spec) so a 256-entry table is always generated.
- */
-static void
-png_build_8bit_table(png_structrp png_ptr, png_bytepp ptable,
-    png_fixed_point gamma_val)
-{
-   unsigned int i;
-   png_bytep table = *ptable = (png_bytep)png_malloc(png_ptr, 256);
-
-   if (png_gamma_significant(gamma_val) != 0)
-      for (i=0; i<256; i++)
-         table[i] = png_gamma_8bit_correct(i, gamma_val);
-
-   else
-      for (i=0; i<256; ++i)
-         table[i] = (png_byte)(i & 0xff);
-}
-
-/* Used from png_read_destroy and below to release the memory used by the gamma
- * tables.
- */
-void /* PRIVATE */
-png_destroy_gamma_table(png_structrp png_ptr)
-{
-   png_free(png_ptr, png_ptr->gamma_table);
-   png_ptr->gamma_table = NULL;
-
-#ifdef PNG_16BIT_SUPPORTED
-   if (png_ptr->gamma_16_table != NULL)
-   {
-      int i;
-      int istop = (1 << (8 - png_ptr->gamma_shift));
-      for (i = 0; i < istop; i++)
-      {
-         png_free(png_ptr, png_ptr->gamma_16_table[i]);
-      }
-   png_free(png_ptr, png_ptr->gamma_16_table);
-   png_ptr->gamma_16_table = NULL;
-   }
-#endif /* 16BIT */
-
-#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \
-   defined(PNG_READ_ALPHA_MODE_SUPPORTED) || \
-   defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
-   png_free(png_ptr, png_ptr->gamma_from_1);
-   png_ptr->gamma_from_1 = NULL;
-   png_free(png_ptr, png_ptr->gamma_to_1);
-   png_ptr->gamma_to_1 = NULL;
-
-#ifdef PNG_16BIT_SUPPORTED
-   if (png_ptr->gamma_16_from_1 != NULL)
-   {
-      int i;
-      int istop = (1 << (8 - png_ptr->gamma_shift));
-      for (i = 0; i < istop; i++)
-      {
-         png_free(png_ptr, png_ptr->gamma_16_from_1[i]);
-      }
-   png_free(png_ptr, png_ptr->gamma_16_from_1);
-   png_ptr->gamma_16_from_1 = NULL;
-   }
-   if (png_ptr->gamma_16_to_1 != NULL)
-   {
-      int i;
-      int istop = (1 << (8 - png_ptr->gamma_shift));
-      for (i = 0; i < istop; i++)
-      {
-         png_free(png_ptr, png_ptr->gamma_16_to_1[i]);
-      }
-   png_free(png_ptr, png_ptr->gamma_16_to_1);
-   png_ptr->gamma_16_to_1 = NULL;
-   }
-#endif /* 16BIT */
-#endif /* READ_BACKGROUND || READ_ALPHA_MODE || RGB_TO_GRAY */
-}
-
-/* We build the 8- or 16-bit gamma tables here.  Note that for 16-bit
- * tables, we don't make a full table if we are reducing to 8-bit in
- * the future.  Note also how the gamma_16 tables are segmented so that
- * we don't need to allocate > 64K chunks for a full 16-bit table.
- */
-void /* PRIVATE */
-png_build_gamma_table(png_structrp png_ptr, int bit_depth)
-{
-   png_debug(1, "in png_build_gamma_table");
-
-   /* Remove any existing table; this copes with multiple calls to
-    * png_read_update_info. The warning is because building the gamma tables
-    * multiple times is a performance hit - it's harmless but the ability to
-    * call png_read_update_info() multiple times is new in 1.5.6 so it seems
-    * sensible to warn if the app introduces such a hit.
-    */
-   if (png_ptr->gamma_table != NULL || png_ptr->gamma_16_table != NULL)
-   {
-      png_warning(png_ptr, "gamma table being rebuilt");
-      png_destroy_gamma_table(png_ptr);
-   }
-
-   if (bit_depth <= 8)
-   {
-      png_build_8bit_table(png_ptr, &png_ptr->gamma_table,
-          png_ptr->screen_gamma > 0 ?
-          png_reciprocal2(png_ptr->colorspace.gamma,
-          png_ptr->screen_gamma) : PNG_FP_1);
-
-#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \
-   defined(PNG_READ_ALPHA_MODE_SUPPORTED) || \
-   defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
-      if ((png_ptr->transformations & (PNG_COMPOSE | PNG_RGB_TO_GRAY)) != 0)
-      {
-         png_build_8bit_table(png_ptr, &png_ptr->gamma_to_1,
-             png_reciprocal(png_ptr->colorspace.gamma));
-
-         png_build_8bit_table(png_ptr, &png_ptr->gamma_from_1,
-             png_ptr->screen_gamma > 0 ?
-             png_reciprocal(png_ptr->screen_gamma) :
-             png_ptr->colorspace.gamma/* Probably doing rgb_to_gray */);
-      }
-#endif /* READ_BACKGROUND || READ_ALPHA_MODE || RGB_TO_GRAY */
-   }
-#ifdef PNG_16BIT_SUPPORTED
-   else
-   {
-      png_byte shift, sig_bit;
-
-      if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) != 0)
-      {
-         sig_bit = png_ptr->sig_bit.red;
-
-         if (png_ptr->sig_bit.green > sig_bit)
-            sig_bit = png_ptr->sig_bit.green;
-
-         if (png_ptr->sig_bit.blue > sig_bit)
-            sig_bit = png_ptr->sig_bit.blue;
-      }
-      else
-         sig_bit = png_ptr->sig_bit.gray;
-
-      /* 16-bit gamma code uses this equation:
-       *
-       *   ov = table[(iv & 0xff) >> gamma_shift][iv >> 8]
-       *
-       * Where 'iv' is the input color value and 'ov' is the output value -
-       * pow(iv, gamma).
-       *
-       * Thus the gamma table consists of up to 256 256-entry tables.  The table
-       * is selected by the (8-gamma_shift) most significant of the low 8 bits
-       * of the color value then indexed by the upper 8 bits:
-       *
-       *   table[low bits][high 8 bits]
-       *
-       * So the table 'n' corresponds to all those 'iv' of:
-       *
-       *   <all high 8-bit values><n << gamma_shift>..<(n+1 << gamma_shift)-1>
-       *
-       */
-      if (sig_bit > 0 && sig_bit < 16U)
-         /* shift == insignificant bits */
-         shift = (png_byte)((16U - sig_bit) & 0xff);
-
-      else
-         shift = 0; /* keep all 16 bits */
-
-      if ((png_ptr->transformations & (PNG_16_TO_8 | PNG_SCALE_16_TO_8)) != 0)
-      {
-         /* PNG_MAX_GAMMA_8 is the number of bits to keep - effectively
-          * the significant bits in the *input* when the output will
-          * eventually be 8 bits.  By default it is 11.
-          */
-         if (shift < (16U - PNG_MAX_GAMMA_8))
-            shift = (16U - PNG_MAX_GAMMA_8);
-      }
-
-      if (shift > 8U)
-         shift = 8U; /* Guarantees at least one table! */
-
-      png_ptr->gamma_shift = shift;
-
-      /* NOTE: prior to 1.5.4 this test used to include PNG_BACKGROUND (now
-       * PNG_COMPOSE).  This effectively smashed the background calculation for
-       * 16-bit output because the 8-bit table assumes the result will be
-       * reduced to 8 bits.
-       */
-      if ((png_ptr->transformations & (PNG_16_TO_8 | PNG_SCALE_16_TO_8)) != 0)
-          png_build_16to8_table(png_ptr, &png_ptr->gamma_16_table, shift,
-          png_ptr->screen_gamma > 0 ? png_product2(png_ptr->colorspace.gamma,
-          png_ptr->screen_gamma) : PNG_FP_1);
-
-      else
-          png_build_16bit_table(png_ptr, &png_ptr->gamma_16_table, shift,
-          png_ptr->screen_gamma > 0 ? png_reciprocal2(png_ptr->colorspace.gamma,
-          png_ptr->screen_gamma) : PNG_FP_1);
-
-#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \
-   defined(PNG_READ_ALPHA_MODE_SUPPORTED) || \
-   defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
-      if ((png_ptr->transformations & (PNG_COMPOSE | PNG_RGB_TO_GRAY)) != 0)
-      {
-         png_build_16bit_table(png_ptr, &png_ptr->gamma_16_to_1, shift,
-             png_reciprocal(png_ptr->colorspace.gamma));
-
-         /* Notice that the '16 from 1' table should be full precision, however
-          * the lookup on this table still uses gamma_shift, so it can't be.
-          * TODO: fix this.
-          */
-         png_build_16bit_table(png_ptr, &png_ptr->gamma_16_from_1, shift,
-             png_ptr->screen_gamma > 0 ? png_reciprocal(png_ptr->screen_gamma) :
-             png_ptr->colorspace.gamma/* Probably doing rgb_to_gray */);
-      }
-#endif /* READ_BACKGROUND || READ_ALPHA_MODE || RGB_TO_GRAY */
-   }
-#endif /* 16BIT */
-}
-#endif /* READ_GAMMA */
-
-/* HARDWARE OR SOFTWARE OPTION SUPPORT */
-#ifdef PNG_SET_OPTION_SUPPORTED
-int PNGAPI
-png_set_option(png_structrp png_ptr, int option, int onoff)
-{
-   if (png_ptr != NULL && option >= 0 && option < PNG_OPTION_NEXT &&
-      (option & 1) == 0)
-   {
-      png_uint_32 mask = 3U << option;
-      png_uint_32 setting = (2U + (onoff != 0)) << option;
-      png_uint_32 current = png_ptr->options;
-
-      png_ptr->options = (png_uint_32)((current & ~mask) | setting);
-
-      return (int)(current & mask) >> option;
-   }
-
-   return PNG_OPTION_INVALID;
-}
-#endif
-
-/* sRGB support */
-#if defined(PNG_SIMPLIFIED_READ_SUPPORTED) ||\
-   defined(PNG_SIMPLIFIED_WRITE_SUPPORTED)
-/* sRGB conversion tables; these are machine generated with the code in
- * contrib/tools/makesRGB.c.  The actual sRGB transfer curve defined in the
- * specification (see the article at https://en.wikipedia.org/wiki/SRGB)
- * is used, not the gamma=1/2.2 approximation use elsewhere in libpng.
- * The sRGB to linear table is exact (to the nearest 16-bit linear fraction).
- * The inverse (linear to sRGB) table has accuracies as follows:
- *
- * For all possible (255*65535+1) input values:
- *
- *    error: -0.515566 - 0.625971, 79441 (0.475369%) of readings inexact
- *
- * For the input values corresponding to the 65536 16-bit values:
- *
- *    error: -0.513727 - 0.607759, 308 (0.469978%) of readings inexact
- *
- * In all cases the inexact readings are only off by one.
- */
-
-#ifdef PNG_SIMPLIFIED_READ_SUPPORTED
-/* The convert-to-sRGB table is only currently required for read. */
-const png_uint_16 png_sRGB_table[256] =
-{
-   0,20,40,60,80,99,119,139,
-   159,179,199,219,241,264,288,313,
-   340,367,396,427,458,491,526,562,
-   599,637,677,718,761,805,851,898,
-   947,997,1048,1101,1156,1212,1270,1330,
-   1391,1453,1517,1583,1651,1720,1790,1863,
-   1937,2013,2090,2170,2250,2333,2418,2504,
-   2592,2681,2773,2866,2961,3058,3157,3258,
-   3360,3464,3570,3678,3788,3900,4014,4129,
-   4247,4366,4488,4611,4736,4864,4993,5124,
-   5257,5392,5530,5669,5810,5953,6099,6246,
-   6395,6547,6700,6856,7014,7174,7335,7500,
-   7666,7834,8004,8177,8352,8528,8708,8889,
-   9072,9258,9445,9635,9828,10022,10219,10417,
-   10619,10822,11028,11235,11446,11658,11873,12090,
-   12309,12530,12754,12980,13209,13440,13673,13909,
-   14146,14387,14629,14874,15122,15371,15623,15878,
-   16135,16394,16656,16920,17187,17456,17727,18001,
-   18277,18556,18837,19121,19407,19696,19987,20281,
-   20577,20876,21177,21481,21787,22096,22407,22721,
-   23038,23357,23678,24002,24329,24658,24990,25325,
-   25662,26001,26344,26688,27036,27386,27739,28094,
-   28452,28813,29176,29542,29911,30282,30656,31033,
-   31412,31794,32179,32567,32957,33350,33745,34143,
-   34544,34948,35355,35764,36176,36591,37008,37429,
-   37852,38278,38706,39138,39572,40009,40449,40891,
-   41337,41785,42236,42690,43147,43606,44069,44534,
-   45002,45473,45947,46423,46903,47385,47871,48359,
-   48850,49344,49841,50341,50844,51349,51858,52369,
-   52884,53401,53921,54445,54971,55500,56032,56567,
-   57105,57646,58190,58737,59287,59840,60396,60955,
-   61517,62082,62650,63221,63795,64372,64952,65535
-};
-#endif /* SIMPLIFIED_READ */
-
-/* The base/delta tables are required for both read and write (but currently
- * only the simplified versions.)
- */
-const png_uint_16 png_sRGB_base[512] =
-{
-   128,1782,3383,4644,5675,6564,7357,8074,
-   8732,9346,9921,10463,10977,11466,11935,12384,
-   12816,13233,13634,14024,14402,14769,15125,15473,
-   15812,16142,16466,16781,17090,17393,17690,17981,
-   18266,18546,18822,19093,19359,19621,19879,20133,
-   20383,20630,20873,21113,21349,21583,21813,22041,
-   22265,22487,22707,22923,23138,23350,23559,23767,
-   23972,24175,24376,24575,24772,24967,25160,25352,
-   25542,25730,25916,26101,26284,26465,26645,26823,
-   27000,27176,27350,27523,27695,27865,28034,28201,
-   28368,28533,28697,28860,29021,29182,29341,29500,
-   29657,29813,29969,30123,30276,30429,30580,30730,
-   30880,31028,31176,31323,31469,31614,31758,31902,
-   32045,32186,32327,32468,32607,32746,32884,33021,
-   33158,33294,33429,33564,33697,33831,33963,34095,
-   34226,34357,34486,34616,34744,34873,35000,35127,
-   35253,35379,35504,35629,35753,35876,35999,36122,
-   36244,36365,36486,36606,36726,36845,36964,37083,
-   37201,37318,37435,37551,37668,37783,37898,38013,
-   38127,38241,38354,38467,38580,38692,38803,38915,
-   39026,39136,39246,39356,39465,39574,39682,39790,
-   39898,40005,40112,40219,40325,40431,40537,40642,
-   40747,40851,40955,41059,41163,41266,41369,41471,
-   41573,41675,41777,41878,41979,42079,42179,42279,
-   42379,42478,42577,42676,42775,42873,42971,43068,
-   43165,43262,43359,43456,43552,43648,43743,43839,
-   43934,44028,44123,44217,44311,44405,44499,44592,
-   44685,44778,44870,44962,45054,45146,45238,45329,
-   45420,45511,45601,45692,45782,45872,45961,46051,
-   46140,46229,46318,46406,46494,46583,46670,46758,
-   46846,46933,47020,47107,47193,47280,47366,47452,
-   47538,47623,47709,47794,47879,47964,48048,48133,
-   48217,48301,48385,48468,48552,48635,48718,48801,
-   48884,48966,49048,49131,49213,49294,49376,49458,
-   49539,49620,49701,49782,49862,49943,50023,50103,
-   50183,50263,50342,50422,50501,50580,50659,50738,
-   50816,50895,50973,51051,51129,51207,51285,51362,
-   51439,51517,51594,51671,51747,51824,51900,51977,
-   52053,52129,52205,52280,52356,52432,52507,52582,
-   52657,52732,52807,52881,52956,53030,53104,53178,
-   53252,53326,53400,53473,53546,53620,53693,53766,
-   53839,53911,53984,54056,54129,54201,54273,54345,
-   54417,54489,54560,54632,54703,54774,54845,54916,
-   54987,55058,55129,55199,55269,55340,55410,55480,
-   55550,55620,55689,55759,55828,55898,55967,56036,
-   56105,56174,56243,56311,56380,56448,56517,56585,
-   56653,56721,56789,56857,56924,56992,57059,57127,
-   57194,57261,57328,57395,57462,57529,57595,57662,
-   57728,57795,57861,57927,57993,58059,58125,58191,
-   58256,58322,58387,58453,58518,58583,58648,58713,
-   58778,58843,58908,58972,59037,59101,59165,59230,
-   59294,59358,59422,59486,59549,59613,59677,59740,
-   59804,59867,59930,59993,60056,60119,60182,60245,
-   60308,60370,60433,60495,60558,60620,60682,60744,
-   60806,60868,60930,60992,61054,61115,61177,61238,
-   61300,61361,61422,61483,61544,61605,61666,61727,
-   61788,61848,61909,61969,62030,62090,62150,62211,
-   62271,62331,62391,62450,62510,62570,62630,62689,
-   62749,62808,62867,62927,62986,63045,63104,63163,
-   63222,63281,63340,63398,63457,63515,63574,63632,
-   63691,63749,63807,63865,63923,63981,64039,64097,
-   64155,64212,64270,64328,64385,64443,64500,64557,
-   64614,64672,64729,64786,64843,64900,64956,65013,
-   65070,65126,65183,65239,65296,65352,65409,65465
-};
-
-const png_byte png_sRGB_delta[512] =
-{
-   207,201,158,129,113,100,90,82,77,72,68,64,61,59,56,54,
-   52,50,49,47,46,45,43,42,41,40,39,39,38,37,36,36,
-   35,34,34,33,33,32,32,31,31,30,30,30,29,29,28,28,
-   28,27,27,27,27,26,26,26,25,25,25,25,24,24,24,24,
-   23,23,23,23,23,22,22,22,22,22,22,21,21,21,21,21,
-   21,20,20,20,20,20,20,20,20,19,19,19,19,19,19,19,
-   19,18,18,18,18,18,18,18,18,18,18,17,17,17,17,17,
-   17,17,17,17,17,17,16,16,16,16,16,16,16,16,16,16,
-   16,16,16,16,15,15,15,15,15,15,15,15,15,15,15,15,
-   15,15,15,15,14,14,14,14,14,14,14,14,14,14,14,14,
-   14,14,14,14,14,14,14,13,13,13,13,13,13,13,13,13,
-   13,13,13,13,13,13,13,13,13,13,13,13,13,13,12,12,
-   12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,
-   12,12,12,12,12,12,12,12,12,12,12,12,11,11,11,11,
-   11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,
-   11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,
-   11,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,
-   10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,
-   10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,
-   10,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
-   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
-   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
-   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
-   9,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
-   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
-   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
-   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
-   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
-   8,8,8,8,8,8,8,8,8,7,7,7,7,7,7,7,
-   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
-};
-#endif /* SIMPLIFIED READ/WRITE sRGB support */
-
-/* SIMPLIFIED READ/WRITE SUPPORT */
-#if defined(PNG_SIMPLIFIED_READ_SUPPORTED) ||\
-   defined(PNG_SIMPLIFIED_WRITE_SUPPORTED)
-static int
-png_image_free_function(png_voidp argument)
-{
-   png_imagep image = png_voidcast(png_imagep, argument);
-   png_controlp cp = image->opaque;
-   png_control c;
-
-   /* Double check that we have a png_ptr - it should be impossible to get here
-    * without one.
-    */
-   if (cp->png_ptr == NULL)
-      return 0;
-
-   /* First free any data held in the control structure. */
-#  ifdef PNG_STDIO_SUPPORTED
-      if (cp->owned_file != 0)
-      {
-         FILE *fp = png_voidcast(FILE*, cp->png_ptr->io_ptr);
-         cp->owned_file = 0;
-
-         /* Ignore errors here. */
-         if (fp != NULL)
-         {
-            cp->png_ptr->io_ptr = NULL;
-            (void)fclose(fp);
-         }
-      }
-#  endif
-
-   /* Copy the control structure so that the original, allocated, version can be
-    * safely freed.  Notice that a png_error here stops the remainder of the
-    * cleanup, but this is probably fine because that would indicate bad memory
-    * problems anyway.
-    */
-   c = *cp;
-   image->opaque = &c;
-   png_free(c.png_ptr, cp);
-
-   /* Then the structures, calling the correct API. */
-   if (c.for_write != 0)
-   {
-#     ifdef PNG_SIMPLIFIED_WRITE_SUPPORTED
-         png_destroy_write_struct(&c.png_ptr, &c.info_ptr);
-#     else
-         png_error(c.png_ptr, "simplified write not supported");
-#     endif
-   }
-   else
-   {
-#     ifdef PNG_SIMPLIFIED_READ_SUPPORTED
-         png_destroy_read_struct(&c.png_ptr, &c.info_ptr, NULL);
-#     else
-         png_error(c.png_ptr, "simplified read not supported");
-#     endif
-   }
-
-   /* Success. */
-   return 1;
-}
-
-void PNGAPI
-png_image_free(png_imagep image)
-{
-   /* Safely call the real function, but only if doing so is safe at this point
-    * (if not inside an error handling context).  Otherwise assume
-    * png_safe_execute will call this API after the return.
-    */
-   if (image != NULL && image->opaque != NULL &&
-      image->opaque->error_buf == NULL)
-   {
-      png_image_free_function(image);
-      image->opaque = NULL;
-   }
-}
-
-int /* PRIVATE */
-png_image_error(png_imagep image, png_const_charp error_message)
-{
-   /* Utility to log an error. */
-   png_safecat(image->message, (sizeof image->message), 0, error_message);
-   image->warning_or_error |= PNG_IMAGE_ERROR;
-   png_image_free(image);
-   return 0;
-}
-
-#endif /* SIMPLIFIED READ/WRITE */
-#endif /* READ || WRITE */
diff --git a/dep/libpng/src/pngdebug.h b/dep/libpng/src/pngdebug.h
deleted file mode 100644
index 00d5a4569..000000000
--- a/dep/libpng/src/pngdebug.h
+++ /dev/null
@@ -1,153 +0,0 @@
-
-/* pngdebug.h - Debugging macros for libpng, also used in pngtest.c
- *
- * Copyright (c) 2018 Cosmin Truta
- * Copyright (c) 1998-2002,2004,2006-2013 Glenn Randers-Pehrson
- * Copyright (c) 1996-1997 Andreas Dilger
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-/* Define PNG_DEBUG at compile time for debugging information.  Higher
- * numbers for PNG_DEBUG mean more debugging information.  This has
- * only been added since version 0.95 so it is not implemented throughout
- * libpng yet, but more support will be added as needed.
- *
- * png_debug[1-2]?(level, message ,arg{0-2})
- *   Expands to a statement (either a simple expression or a compound
- *   do..while(0) statement) that outputs a message with parameter
- *   substitution if PNG_DEBUG is defined to 2 or more.  If PNG_DEBUG
- *   is undefined, 0 or 1 every png_debug expands to a simple expression
- *   (actually ((void)0)).
- *
- *   level: level of detail of message, starting at 0.  A level 'n'
- *          message is preceded by 'n' 3-space indentations (not implemented
- *          on Microsoft compilers unless PNG_DEBUG_FILE is also
- *          defined, to allow debug DLL compilation with no standard IO).
- *   message: a printf(3) style text string.  A trailing '\n' is added
- *            to the message.
- *   arg: 0 to 2 arguments for printf(3) style substitution in message.
- */
-#ifndef PNGDEBUG_H
-#define PNGDEBUG_H
-/* These settings control the formatting of messages in png.c and pngerror.c */
-/* Moved to pngdebug.h at 1.5.0 */
-#  ifndef PNG_LITERAL_SHARP
-#    define PNG_LITERAL_SHARP 0x23
-#  endif
-#  ifndef PNG_LITERAL_LEFT_SQUARE_BRACKET
-#    define PNG_LITERAL_LEFT_SQUARE_BRACKET 0x5b
-#  endif
-#  ifndef PNG_LITERAL_RIGHT_SQUARE_BRACKET
-#    define PNG_LITERAL_RIGHT_SQUARE_BRACKET 0x5d
-#  endif
-#  ifndef PNG_STRING_NEWLINE
-#    define PNG_STRING_NEWLINE "\n"
-#  endif
-
-#ifdef PNG_DEBUG
-#  if (PNG_DEBUG > 0)
-#    if !defined(PNG_DEBUG_FILE) && defined(_MSC_VER)
-#      include <crtdbg.h>
-#      if (PNG_DEBUG > 1)
-#        ifndef _DEBUG
-#          define _DEBUG
-#        endif
-#        ifndef png_debug
-#          define png_debug(l,m)  _RPT0(_CRT_WARN,m PNG_STRING_NEWLINE)
-#        endif
-#        ifndef png_debug1
-#          define png_debug1(l,m,p1)  _RPT1(_CRT_WARN,m PNG_STRING_NEWLINE,p1)
-#        endif
-#        ifndef png_debug2
-#          define png_debug2(l,m,p1,p2) \
-             _RPT2(_CRT_WARN,m PNG_STRING_NEWLINE,p1,p2)
-#        endif
-#      endif
-#    else /* PNG_DEBUG_FILE || !_MSC_VER */
-#      ifndef PNG_STDIO_SUPPORTED
-#        include <stdio.h> /* not included yet */
-#      endif
-#      ifndef PNG_DEBUG_FILE
-#        define PNG_DEBUG_FILE stderr
-#      endif /* PNG_DEBUG_FILE */
-
-#      if (PNG_DEBUG > 1)
-#        ifdef __STDC__
-#          ifndef png_debug
-#            define png_debug(l,m) \
-       do { \
-       int num_tabs=l; \
-       fprintf(PNG_DEBUG_FILE,"%s" m PNG_STRING_NEWLINE,(num_tabs==1 ? "   " : \
-         (num_tabs==2 ? "      " : (num_tabs>2 ? "         " : "")))); \
-       } while (0)
-#          endif
-#          ifndef png_debug1
-#            define png_debug1(l,m,p1) \
-       do { \
-       int num_tabs=l; \
-       fprintf(PNG_DEBUG_FILE,"%s" m PNG_STRING_NEWLINE,(num_tabs==1 ? "   " : \
-         (num_tabs==2 ? "      " : (num_tabs>2 ? "         " : ""))),p1); \
-       } while (0)
-#          endif
-#          ifndef png_debug2
-#            define png_debug2(l,m,p1,p2) \
-       do { \
-       int num_tabs=l; \
-       fprintf(PNG_DEBUG_FILE,"%s" m PNG_STRING_NEWLINE,(num_tabs==1 ? "   " : \
-         (num_tabs==2 ? "      " : (num_tabs>2 ? "         " : ""))),p1,p2);\
-       } while (0)
-#          endif
-#        else /* __STDC __ */
-#          ifndef png_debug
-#            define png_debug(l,m) \
-       do { \
-       int num_tabs=l; \
-       char format[256]; \
-       snprintf(format,256,"%s%s%s",(num_tabs==1 ? "\t" : \
-         (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))), \
-         m,PNG_STRING_NEWLINE); \
-       fprintf(PNG_DEBUG_FILE,format); \
-       } while (0)
-#          endif
-#          ifndef png_debug1
-#            define png_debug1(l,m,p1) \
-       do { \
-       int num_tabs=l; \
-       char format[256]; \
-       snprintf(format,256,"%s%s%s",(num_tabs==1 ? "\t" : \
-         (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))), \
-         m,PNG_STRING_NEWLINE); \
-       fprintf(PNG_DEBUG_FILE,format,p1); \
-       } while (0)
-#          endif
-#          ifndef png_debug2
-#            define png_debug2(l,m,p1,p2) \
-       do { \
-       int num_tabs=l; \
-       char format[256]; \
-       snprintf(format,256,"%s%s%s",(num_tabs==1 ? "\t" : \
-         (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))), \
-         m,PNG_STRING_NEWLINE); \
-       fprintf(PNG_DEBUG_FILE,format,p1,p2); \
-       } while (0)
-#          endif
-#        endif /* __STDC __ */
-#      endif /* (PNG_DEBUG > 1) */
-
-#    endif /* _MSC_VER */
-#  endif /* (PNG_DEBUG > 0) */
-#endif /* PNG_DEBUG */
-#ifndef png_debug
-#  define png_debug(l, m) ((void)0)
-#endif
-#ifndef png_debug1
-#  define png_debug1(l, m, p1) ((void)0)
-#endif
-#ifndef png_debug2
-#  define png_debug2(l, m, p1, p2) ((void)0)
-#endif
-#endif /* PNGDEBUG_H */
diff --git a/dep/libpng/src/pngerror.c b/dep/libpng/src/pngerror.c
deleted file mode 100644
index 29ebda794..000000000
--- a/dep/libpng/src/pngerror.c
+++ /dev/null
@@ -1,957 +0,0 @@
-
-/* pngerror.c - stub functions for i/o and memory allocation
- *
- * Copyright (c) 2018-2024 Cosmin Truta
- * Copyright (c) 1998-2002,2004,2006-2017 Glenn Randers-Pehrson
- * Copyright (c) 1996-1997 Andreas Dilger
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- *
- * This file provides a location for all error handling.  Users who
- * need special error handling are expected to write replacement functions
- * and use png_set_error_fn() to use those functions.  See the instructions
- * at each function.
- */
-
-#include "pngpriv.h"
-
-#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
-
-static PNG_FUNCTION(void, png_default_error,PNGARG((png_const_structrp png_ptr,
-    png_const_charp error_message)),PNG_NORETURN);
-
-#ifdef PNG_WARNINGS_SUPPORTED
-static void /* PRIVATE */
-png_default_warning PNGARG((png_const_structrp png_ptr,
-    png_const_charp warning_message));
-#endif /* WARNINGS */
-
-/* This function is called whenever there is a fatal error.  This function
- * should not be changed.  If there is a need to handle errors differently,
- * you should supply a replacement error function and use png_set_error_fn()
- * to replace the error function at run-time.
- */
-#ifdef PNG_ERROR_TEXT_SUPPORTED
-PNG_FUNCTION(void,PNGAPI
-png_error,(png_const_structrp png_ptr, png_const_charp error_message),
-    PNG_NORETURN)
-{
-#ifdef PNG_ERROR_NUMBERS_SUPPORTED
-   char msg[16];
-   if (png_ptr != NULL)
-   {
-      if ((png_ptr->flags &
-         (PNG_FLAG_STRIP_ERROR_NUMBERS|PNG_FLAG_STRIP_ERROR_TEXT)) != 0)
-      {
-         if (*error_message == PNG_LITERAL_SHARP)
-         {
-            /* Strip "#nnnn " from beginning of error message. */
-            int offset;
-            for (offset = 1; offset<15; offset++)
-               if (error_message[offset] == ' ')
-                  break;
-
-            if ((png_ptr->flags & PNG_FLAG_STRIP_ERROR_TEXT) != 0)
-            {
-               int i;
-               for (i = 0; i < offset - 1; i++)
-                  msg[i] = error_message[i + 1];
-               msg[i - 1] = '\0';
-               error_message = msg;
-            }
-
-            else
-               error_message += offset;
-         }
-
-         else
-         {
-            if ((png_ptr->flags & PNG_FLAG_STRIP_ERROR_TEXT) != 0)
-            {
-               msg[0] = '0';
-               msg[1] = '\0';
-               error_message = msg;
-            }
-         }
-      }
-   }
-#endif
-   if (png_ptr != NULL && png_ptr->error_fn != NULL)
-      (*(png_ptr->error_fn))(png_constcast(png_structrp,png_ptr),
-          error_message);
-
-   /* If the custom handler doesn't exist, or if it returns,
-      use the default handler, which will not return. */
-   png_default_error(png_ptr, error_message);
-}
-#else
-PNG_FUNCTION(void,PNGAPI
-png_err,(png_const_structrp png_ptr),PNG_NORETURN)
-{
-   /* Prior to 1.5.2 the error_fn received a NULL pointer, expressed
-    * erroneously as '\0', instead of the empty string "".  This was
-    * apparently an error, introduced in libpng-1.2.20, and png_default_error
-    * will crash in this case.
-    */
-   if (png_ptr != NULL && png_ptr->error_fn != NULL)
-      (*(png_ptr->error_fn))(png_constcast(png_structrp,png_ptr), "");
-
-   /* If the custom handler doesn't exist, or if it returns,
-      use the default handler, which will not return. */
-   png_default_error(png_ptr, "");
-}
-#endif /* ERROR_TEXT */
-
-/* Utility to safely appends strings to a buffer.  This never errors out so
- * error checking is not required in the caller.
- */
-size_t
-png_safecat(png_charp buffer, size_t bufsize, size_t pos,
-    png_const_charp string)
-{
-   if (buffer != NULL && pos < bufsize)
-   {
-      if (string != NULL)
-         while (*string != '\0' && pos < bufsize-1)
-           buffer[pos++] = *string++;
-
-      buffer[pos] = '\0';
-   }
-
-   return pos;
-}
-
-#if defined(PNG_WARNINGS_SUPPORTED) || defined(PNG_TIME_RFC1123_SUPPORTED)
-/* Utility to dump an unsigned value into a buffer, given a start pointer and
- * and end pointer (which should point just *beyond* the end of the buffer!)
- * Returns the pointer to the start of the formatted string.
- */
-png_charp
-png_format_number(png_const_charp start, png_charp end, int format,
-    png_alloc_size_t number)
-{
-   int count = 0;    /* number of digits output */
-   int mincount = 1; /* minimum number required */
-   int output = 0;   /* digit output (for the fixed point format) */
-
-   *--end = '\0';
-
-   /* This is written so that the loop always runs at least once, even with
-    * number zero.
-    */
-   while (end > start && (number != 0 || count < mincount))
-   {
-
-      static const char digits[] = "0123456789ABCDEF";
-
-      switch (format)
-      {
-         case PNG_NUMBER_FORMAT_fixed:
-            /* Needs five digits (the fraction) */
-            mincount = 5;
-            if (output != 0 || number % 10 != 0)
-            {
-               *--end = digits[number % 10];
-               output = 1;
-            }
-            number /= 10;
-            break;
-
-         case PNG_NUMBER_FORMAT_02u:
-            /* Expects at least 2 digits. */
-            mincount = 2;
-            /* FALLTHROUGH */
-
-         case PNG_NUMBER_FORMAT_u:
-            *--end = digits[number % 10];
-            number /= 10;
-            break;
-
-         case PNG_NUMBER_FORMAT_02x:
-            /* This format expects at least two digits */
-            mincount = 2;
-            /* FALLTHROUGH */
-
-         case PNG_NUMBER_FORMAT_x:
-            *--end = digits[number & 0xf];
-            number >>= 4;
-            break;
-
-         default: /* an error */
-            number = 0;
-            break;
-      }
-
-      /* Keep track of the number of digits added */
-      ++count;
-
-      /* Float a fixed number here: */
-      if ((format == PNG_NUMBER_FORMAT_fixed) && (count == 5) && (end > start))
-      {
-         /* End of the fraction, but maybe nothing was output?  In that case
-          * drop the decimal point.  If the number is a true zero handle that
-          * here.
-          */
-         if (output != 0)
-            *--end = '.';
-         else if (number == 0) /* and !output */
-            *--end = '0';
-      }
-   }
-
-   return end;
-}
-#endif
-
-#ifdef PNG_WARNINGS_SUPPORTED
-/* This function is called whenever there is a non-fatal error.  This function
- * should not be changed.  If there is a need to handle warnings differently,
- * you should supply a replacement warning function and use
- * png_set_error_fn() to replace the warning function at run-time.
- */
-void PNGAPI
-png_warning(png_const_structrp png_ptr, png_const_charp warning_message)
-{
-   int offset = 0;
-   if (png_ptr != NULL)
-   {
-#ifdef PNG_ERROR_NUMBERS_SUPPORTED
-   if ((png_ptr->flags &
-       (PNG_FLAG_STRIP_ERROR_NUMBERS|PNG_FLAG_STRIP_ERROR_TEXT)) != 0)
-#endif
-      {
-         if (*warning_message == PNG_LITERAL_SHARP)
-         {
-            for (offset = 1; offset < 15; offset++)
-               if (warning_message[offset] == ' ')
-                  break;
-         }
-      }
-   }
-   if (png_ptr != NULL && png_ptr->warning_fn != NULL)
-      (*(png_ptr->warning_fn))(png_constcast(png_structrp,png_ptr),
-          warning_message + offset);
-   else
-      png_default_warning(png_ptr, warning_message + offset);
-}
-
-/* These functions support 'formatted' warning messages with up to
- * PNG_WARNING_PARAMETER_COUNT parameters.  In the format string the parameter
- * is introduced by @<number>, where 'number' starts at 1.  This follows the
- * standard established by X/Open for internationalizable error messages.
- */
-void
-png_warning_parameter(png_warning_parameters p, int number,
-    png_const_charp string)
-{
-   if (number > 0 && number <= PNG_WARNING_PARAMETER_COUNT)
-      (void)png_safecat(p[number-1], (sizeof p[number-1]), 0, string);
-}
-
-void
-png_warning_parameter_unsigned(png_warning_parameters p, int number, int format,
-    png_alloc_size_t value)
-{
-   char buffer[PNG_NUMBER_BUFFER_SIZE] = {0};
-   png_warning_parameter(p, number, PNG_FORMAT_NUMBER(buffer, format, value));
-}
-
-void
-png_warning_parameter_signed(png_warning_parameters p, int number, int format,
-    png_int_32 value)
-{
-   png_alloc_size_t u;
-   png_charp str;
-   char buffer[PNG_NUMBER_BUFFER_SIZE] = {0};
-
-   /* Avoid overflow by doing the negate in a png_alloc_size_t: */
-   u = (png_alloc_size_t)value;
-   if (value < 0)
-      u = ~u + 1;
-
-   str = PNG_FORMAT_NUMBER(buffer, format, u);
-
-   if (value < 0 && str > buffer)
-      *--str = '-';
-
-   png_warning_parameter(p, number, str);
-}
-
-void
-png_formatted_warning(png_const_structrp png_ptr, png_warning_parameters p,
-    png_const_charp message)
-{
-   /* The internal buffer is just 192 bytes - enough for all our messages,
-    * overflow doesn't happen because this code checks!  If someone figures
-    * out how to send us a message longer than 192 bytes, all that will
-    * happen is that the message will be truncated appropriately.
-    */
-   size_t i = 0; /* Index in the msg[] buffer: */
-   char msg[192];
-
-   /* Each iteration through the following loop writes at most one character
-    * to msg[i++] then returns here to validate that there is still space for
-    * the trailing '\0'.  It may (in the case of a parameter) read more than
-    * one character from message[]; it must check for '\0' and continue to the
-    * test if it finds the end of string.
-    */
-   while (i<(sizeof msg)-1 && *message != '\0')
-   {
-      /* '@' at end of string is now just printed (previously it was skipped);
-       * it is an error in the calling code to terminate the string with @.
-       */
-      if (p != NULL && *message == '@' && message[1] != '\0')
-      {
-         int parameter_char = *++message; /* Consume the '@' */
-         static const char valid_parameters[] = "123456789";
-         int parameter = 0;
-
-         /* Search for the parameter digit, the index in the string is the
-          * parameter to use.
-          */
-         while (valid_parameters[parameter] != parameter_char &&
-            valid_parameters[parameter] != '\0')
-            ++parameter;
-
-         /* If the parameter digit is out of range it will just get printed. */
-         if (parameter < PNG_WARNING_PARAMETER_COUNT)
-         {
-            /* Append this parameter */
-            png_const_charp parm = p[parameter];
-            png_const_charp pend = p[parameter] + (sizeof p[parameter]);
-
-            /* No need to copy the trailing '\0' here, but there is no guarantee
-             * that parm[] has been initialized, so there is no guarantee of a
-             * trailing '\0':
-             */
-            while (i<(sizeof msg)-1 && *parm != '\0' && parm < pend)
-               msg[i++] = *parm++;
-
-            /* Consume the parameter digit too: */
-            ++message;
-            continue;
-         }
-
-         /* else not a parameter and there is a character after the @ sign; just
-          * copy that.  This is known not to be '\0' because of the test above.
-          */
-      }
-
-      /* At this point *message can't be '\0', even in the bad parameter case
-       * above where there is a lone '@' at the end of the message string.
-       */
-      msg[i++] = *message++;
-   }
-
-   /* i is always less than (sizeof msg), so: */
-   msg[i] = '\0';
-
-   /* And this is the formatted message. It may be larger than
-    * PNG_MAX_ERROR_TEXT, but that is only used for 'chunk' errors and these
-    * are not (currently) formatted.
-    */
-   png_warning(png_ptr, msg);
-}
-#endif /* WARNINGS */
-
-#ifdef PNG_BENIGN_ERRORS_SUPPORTED
-void PNGAPI
-png_benign_error(png_const_structrp png_ptr, png_const_charp error_message)
-{
-   if ((png_ptr->flags & PNG_FLAG_BENIGN_ERRORS_WARN) != 0)
-   {
-#     ifdef PNG_READ_SUPPORTED
-         if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0 &&
-            png_ptr->chunk_name != 0)
-            png_chunk_warning(png_ptr, error_message);
-         else
-#     endif
-      png_warning(png_ptr, error_message);
-   }
-
-   else
-   {
-#     ifdef PNG_READ_SUPPORTED
-         if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0 &&
-            png_ptr->chunk_name != 0)
-            png_chunk_error(png_ptr, error_message);
-         else
-#     endif
-      png_error(png_ptr, error_message);
-   }
-
-#  ifndef PNG_ERROR_TEXT_SUPPORTED
-      PNG_UNUSED(error_message)
-#  endif
-}
-
-void /* PRIVATE */
-png_app_warning(png_const_structrp png_ptr, png_const_charp error_message)
-{
-   if ((png_ptr->flags & PNG_FLAG_APP_WARNINGS_WARN) != 0)
-      png_warning(png_ptr, error_message);
-   else
-      png_error(png_ptr, error_message);
-
-#  ifndef PNG_ERROR_TEXT_SUPPORTED
-      PNG_UNUSED(error_message)
-#  endif
-}
-
-void /* PRIVATE */
-png_app_error(png_const_structrp png_ptr, png_const_charp error_message)
-{
-   if ((png_ptr->flags & PNG_FLAG_APP_ERRORS_WARN) != 0)
-      png_warning(png_ptr, error_message);
-   else
-      png_error(png_ptr, error_message);
-
-#  ifndef PNG_ERROR_TEXT_SUPPORTED
-      PNG_UNUSED(error_message)
-#  endif
-}
-#endif /* BENIGN_ERRORS */
-
-#define PNG_MAX_ERROR_TEXT 196 /* Currently limited by profile_error in png.c */
-#if defined(PNG_WARNINGS_SUPPORTED) || \
-   (defined(PNG_READ_SUPPORTED) && defined(PNG_ERROR_TEXT_SUPPORTED))
-/* These utilities are used internally to build an error message that relates
- * to the current chunk.  The chunk name comes from png_ptr->chunk_name,
- * which is used to prefix the message.  The message is limited in length
- * to 63 bytes. The name characters are output as hex digits wrapped in []
- * if the character is invalid.
- */
-#define isnonalpha(c) ((c) < 65 || (c) > 122 || ((c) > 90 && (c) < 97))
-static const char png_digit[16] = {
-   '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
-   'A', 'B', 'C', 'D', 'E', 'F'
-};
-
-static void /* PRIVATE */
-png_format_buffer(png_const_structrp png_ptr, png_charp buffer, png_const_charp
-    error_message)
-{
-   png_uint_32 chunk_name = png_ptr->chunk_name;
-   int iout = 0, ishift = 24;
-
-   while (ishift >= 0)
-   {
-      int c = (int)(chunk_name >> ishift) & 0xff;
-
-      ishift -= 8;
-      if (isnonalpha(c) != 0)
-      {
-         buffer[iout++] = PNG_LITERAL_LEFT_SQUARE_BRACKET;
-         buffer[iout++] = png_digit[(c & 0xf0) >> 4];
-         buffer[iout++] = png_digit[c & 0x0f];
-         buffer[iout++] = PNG_LITERAL_RIGHT_SQUARE_BRACKET;
-      }
-
-      else
-      {
-         buffer[iout++] = (char)c;
-      }
-   }
-
-   if (error_message == NULL)
-      buffer[iout] = '\0';
-
-   else
-   {
-      int iin = 0;
-
-      buffer[iout++] = ':';
-      buffer[iout++] = ' ';
-
-      while (iin < PNG_MAX_ERROR_TEXT-1 && error_message[iin] != '\0')
-         buffer[iout++] = error_message[iin++];
-
-      /* iin < PNG_MAX_ERROR_TEXT, so the following is safe: */
-      buffer[iout] = '\0';
-   }
-}
-#endif /* WARNINGS || ERROR_TEXT */
-
-#if defined(PNG_READ_SUPPORTED) && defined(PNG_ERROR_TEXT_SUPPORTED)
-PNG_FUNCTION(void,PNGAPI
-png_chunk_error,(png_const_structrp png_ptr, png_const_charp error_message),
-    PNG_NORETURN)
-{
-   char msg[18+PNG_MAX_ERROR_TEXT];
-   if (png_ptr == NULL)
-      png_error(png_ptr, error_message);
-
-   else
-   {
-      png_format_buffer(png_ptr, msg, error_message);
-      png_error(png_ptr, msg);
-   }
-}
-#endif /* READ && ERROR_TEXT */
-
-#ifdef PNG_WARNINGS_SUPPORTED
-void PNGAPI
-png_chunk_warning(png_const_structrp png_ptr, png_const_charp warning_message)
-{
-   char msg[18+PNG_MAX_ERROR_TEXT];
-   if (png_ptr == NULL)
-      png_warning(png_ptr, warning_message);
-
-   else
-   {
-      png_format_buffer(png_ptr, msg, warning_message);
-      png_warning(png_ptr, msg);
-   }
-}
-#endif /* WARNINGS */
-
-#ifdef PNG_READ_SUPPORTED
-#ifdef PNG_BENIGN_ERRORS_SUPPORTED
-void PNGAPI
-png_chunk_benign_error(png_const_structrp png_ptr, png_const_charp
-    error_message)
-{
-   if ((png_ptr->flags & PNG_FLAG_BENIGN_ERRORS_WARN) != 0)
-      png_chunk_warning(png_ptr, error_message);
-
-   else
-      png_chunk_error(png_ptr, error_message);
-
-#  ifndef PNG_ERROR_TEXT_SUPPORTED
-      PNG_UNUSED(error_message)
-#  endif
-}
-#endif
-#endif /* READ */
-
-void /* PRIVATE */
-png_chunk_report(png_const_structrp png_ptr, png_const_charp message, int error)
-{
-#  ifndef PNG_WARNINGS_SUPPORTED
-      PNG_UNUSED(message)
-#  endif
-
-   /* This is always supported, but for just read or just write it
-    * unconditionally does the right thing.
-    */
-#  if defined(PNG_READ_SUPPORTED) && defined(PNG_WRITE_SUPPORTED)
-      if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0)
-#  endif
-
-#  ifdef PNG_READ_SUPPORTED
-      {
-         if (error < PNG_CHUNK_ERROR)
-            png_chunk_warning(png_ptr, message);
-
-         else
-            png_chunk_benign_error(png_ptr, message);
-      }
-#  endif
-
-#  if defined(PNG_READ_SUPPORTED) && defined(PNG_WRITE_SUPPORTED)
-      else if ((png_ptr->mode & PNG_IS_READ_STRUCT) == 0)
-#  endif
-
-#  ifdef PNG_WRITE_SUPPORTED
-      {
-         if (error < PNG_CHUNK_WRITE_ERROR)
-            png_app_warning(png_ptr, message);
-
-         else
-            png_app_error(png_ptr, message);
-      }
-#  endif
-}
-
-#ifdef PNG_ERROR_TEXT_SUPPORTED
-#ifdef PNG_FLOATING_POINT_SUPPORTED
-PNG_FUNCTION(void,
-png_fixed_error,(png_const_structrp png_ptr, png_const_charp name),PNG_NORETURN)
-{
-#  define fixed_message "fixed point overflow in "
-#  define fixed_message_ln ((sizeof fixed_message)-1)
-   unsigned int  iin;
-   char msg[fixed_message_ln+PNG_MAX_ERROR_TEXT];
-   memcpy(msg, fixed_message, fixed_message_ln);
-   iin = 0;
-   if (name != NULL)
-      while (iin < (PNG_MAX_ERROR_TEXT-1) && name[iin] != 0)
-      {
-         msg[fixed_message_ln + iin] = name[iin];
-         ++iin;
-      }
-   msg[fixed_message_ln + iin] = 0;
-   png_error(png_ptr, msg);
-}
-#endif
-#endif
-
-#ifdef PNG_SETJMP_SUPPORTED
-/* This API only exists if ANSI-C style error handling is used,
- * otherwise it is necessary for png_default_error to be overridden.
- */
-jmp_buf* PNGAPI
-png_set_longjmp_fn(png_structrp png_ptr, png_longjmp_ptr longjmp_fn,
-    size_t jmp_buf_size)
-{
-   /* From libpng 1.6.0 the app gets one chance to set a 'jmpbuf_size' value
-    * and it must not change after that.  Libpng doesn't care how big the
-    * buffer is, just that it doesn't change.
-    *
-    * If the buffer size is no *larger* than the size of jmp_buf when libpng is
-    * compiled a built in jmp_buf is returned; this preserves the pre-1.6.0
-    * semantics that this call will not fail.  If the size is larger, however,
-    * the buffer is allocated and this may fail, causing the function to return
-    * NULL.
-    */
-   if (png_ptr == NULL)
-      return NULL;
-
-   if (png_ptr->jmp_buf_ptr == NULL)
-   {
-      png_ptr->jmp_buf_size = 0; /* not allocated */
-
-      if (jmp_buf_size <= (sizeof png_ptr->jmp_buf_local))
-         png_ptr->jmp_buf_ptr = &png_ptr->jmp_buf_local;
-
-      else
-      {
-         png_ptr->jmp_buf_ptr = png_voidcast(jmp_buf *,
-             png_malloc_warn(png_ptr, jmp_buf_size));
-
-         if (png_ptr->jmp_buf_ptr == NULL)
-            return NULL; /* new NULL return on OOM */
-
-         png_ptr->jmp_buf_size = jmp_buf_size;
-      }
-   }
-
-   else /* Already allocated: check the size */
-   {
-      size_t size = png_ptr->jmp_buf_size;
-
-      if (size == 0)
-      {
-         size = (sizeof png_ptr->jmp_buf_local);
-         if (png_ptr->jmp_buf_ptr != &png_ptr->jmp_buf_local)
-         {
-            /* This is an internal error in libpng: somehow we have been left
-             * with a stack allocated jmp_buf when the application regained
-             * control.  It's always possible to fix this up, but for the moment
-             * this is a png_error because that makes it easy to detect.
-             */
-            png_error(png_ptr, "Libpng jmp_buf still allocated");
-            /* png_ptr->jmp_buf_ptr = &png_ptr->jmp_buf_local; */
-         }
-      }
-
-      if (size != jmp_buf_size)
-      {
-         png_warning(png_ptr, "Application jmp_buf size changed");
-         return NULL; /* caller will probably crash: no choice here */
-      }
-   }
-
-   /* Finally fill in the function, now we have a satisfactory buffer. It is
-    * valid to change the function on every call.
-    */
-   png_ptr->longjmp_fn = longjmp_fn;
-   return png_ptr->jmp_buf_ptr;
-}
-
-void /* PRIVATE */
-png_free_jmpbuf(png_structrp png_ptr)
-{
-   if (png_ptr != NULL)
-   {
-      jmp_buf *jb = png_ptr->jmp_buf_ptr;
-
-      /* A size of 0 is used to indicate a local, stack, allocation of the
-       * pointer; used here and in png.c
-       */
-      if (jb != NULL && png_ptr->jmp_buf_size > 0)
-      {
-
-         /* This stuff is so that a failure to free the error control structure
-          * does not leave libpng in a state with no valid error handling: the
-          * free always succeeds, if there is an error it gets ignored.
-          */
-         if (jb != &png_ptr->jmp_buf_local)
-         {
-            /* Make an internal, libpng, jmp_buf to return here */
-            jmp_buf free_jmp_buf;
-
-            if (!setjmp(free_jmp_buf))
-            {
-               png_ptr->jmp_buf_ptr = &free_jmp_buf; /* come back here */
-               png_ptr->jmp_buf_size = 0; /* stack allocation */
-               png_ptr->longjmp_fn = longjmp;
-               png_free(png_ptr, jb); /* Return to setjmp on error */
-            }
-         }
-      }
-
-      /* *Always* cancel everything out: */
-      png_ptr->jmp_buf_size = 0;
-      png_ptr->jmp_buf_ptr = NULL;
-      png_ptr->longjmp_fn = 0;
-   }
-}
-#endif
-
-/* This is the default error handling function.  Note that replacements for
- * this function MUST NOT RETURN, or the program will likely crash.  This
- * function is used by default, or if the program supplies NULL for the
- * error function pointer in png_set_error_fn().
- */
-static PNG_FUNCTION(void /* PRIVATE */,
-png_default_error,(png_const_structrp png_ptr, png_const_charp error_message),
-    PNG_NORETURN)
-{
-#ifdef PNG_CONSOLE_IO_SUPPORTED
-#ifdef PNG_ERROR_NUMBERS_SUPPORTED
-   /* Check on NULL only added in 1.5.4 */
-   if (error_message != NULL && *error_message == PNG_LITERAL_SHARP)
-   {
-      /* Strip "#nnnn " from beginning of error message. */
-      int offset;
-      char error_number[16];
-      for (offset = 0; offset<15; offset++)
-      {
-         error_number[offset] = error_message[offset + 1];
-         if (error_message[offset] == ' ')
-            break;
-      }
-
-      if ((offset > 1) && (offset < 15))
-      {
-         error_number[offset - 1] = '\0';
-         fprintf(stderr, "libpng error no. %s: %s",
-             error_number, error_message + offset + 1);
-         fprintf(stderr, PNG_STRING_NEWLINE);
-      }
-
-      else
-      {
-         fprintf(stderr, "libpng error: %s, offset=%d",
-             error_message, offset);
-         fprintf(stderr, PNG_STRING_NEWLINE);
-      }
-   }
-   else
-#endif
-   {
-      fprintf(stderr, "libpng error: %s", error_message ? error_message :
-         "undefined");
-      fprintf(stderr, PNG_STRING_NEWLINE);
-   }
-#else
-   PNG_UNUSED(error_message) /* Make compiler happy */
-#endif
-   png_longjmp(png_ptr, 1);
-}
-
-PNG_FUNCTION(void,PNGAPI
-png_longjmp,(png_const_structrp png_ptr, int val),PNG_NORETURN)
-{
-#ifdef PNG_SETJMP_SUPPORTED
-   if (png_ptr != NULL && png_ptr->longjmp_fn != NULL &&
-       png_ptr->jmp_buf_ptr != NULL)
-      png_ptr->longjmp_fn(*png_ptr->jmp_buf_ptr, val);
-#else
-   PNG_UNUSED(png_ptr)
-   PNG_UNUSED(val)
-#endif
-
-   /* If control reaches this point, png_longjmp() must not return. The only
-    * choice is to terminate the whole process (or maybe the thread); to do
-    * this the ANSI-C abort() function is used unless a different method is
-    * implemented by overriding the default configuration setting for
-    * PNG_ABORT().
-    */
-   PNG_ABORT();
-}
-
-#ifdef PNG_WARNINGS_SUPPORTED
-/* This function is called when there is a warning, but the library thinks
- * it can continue anyway.  Replacement functions don't have to do anything
- * here if you don't want them to.  In the default configuration, png_ptr is
- * not used, but it is passed in case it may be useful.
- */
-static void /* PRIVATE */
-png_default_warning(png_const_structrp png_ptr, png_const_charp warning_message)
-{
-#ifdef PNG_CONSOLE_IO_SUPPORTED
-#  ifdef PNG_ERROR_NUMBERS_SUPPORTED
-   if (*warning_message == PNG_LITERAL_SHARP)
-   {
-      int offset;
-      char warning_number[16];
-      for (offset = 0; offset < 15; offset++)
-      {
-         warning_number[offset] = warning_message[offset + 1];
-         if (warning_message[offset] == ' ')
-            break;
-      }
-
-      if ((offset > 1) && (offset < 15))
-      {
-         warning_number[offset + 1] = '\0';
-         fprintf(stderr, "libpng warning no. %s: %s",
-             warning_number, warning_message + offset);
-         fprintf(stderr, PNG_STRING_NEWLINE);
-      }
-
-      else
-      {
-         fprintf(stderr, "libpng warning: %s",
-             warning_message);
-         fprintf(stderr, PNG_STRING_NEWLINE);
-      }
-   }
-   else
-#  endif
-
-   {
-      fprintf(stderr, "libpng warning: %s", warning_message);
-      fprintf(stderr, PNG_STRING_NEWLINE);
-   }
-#else
-   PNG_UNUSED(warning_message) /* Make compiler happy */
-#endif
-   PNG_UNUSED(png_ptr) /* Make compiler happy */
-}
-#endif /* WARNINGS */
-
-/* This function is called when the application wants to use another method
- * of handling errors and warnings.  Note that the error function MUST NOT
- * return to the calling routine or serious problems will occur.  The return
- * method used in the default routine calls longjmp(png_ptr->jmp_buf_ptr, 1)
- */
-void PNGAPI
-png_set_error_fn(png_structrp png_ptr, png_voidp error_ptr,
-    png_error_ptr error_fn, png_error_ptr warning_fn)
-{
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->error_ptr = error_ptr;
-   png_ptr->error_fn = error_fn;
-#ifdef PNG_WARNINGS_SUPPORTED
-   png_ptr->warning_fn = warning_fn;
-#else
-   PNG_UNUSED(warning_fn)
-#endif
-}
-
-
-/* This function returns a pointer to the error_ptr associated with the user
- * functions.  The application should free any memory associated with this
- * pointer before png_write_destroy and png_read_destroy are called.
- */
-png_voidp PNGAPI
-png_get_error_ptr(png_const_structrp png_ptr)
-{
-   if (png_ptr == NULL)
-      return NULL;
-
-   return (png_voidp)png_ptr->error_ptr;
-}
-
-
-#ifdef PNG_ERROR_NUMBERS_SUPPORTED
-void PNGAPI
-png_set_strip_error_numbers(png_structrp png_ptr, png_uint_32 strip_mode)
-{
-   if (png_ptr != NULL)
-   {
-      png_ptr->flags &=
-         ((~(PNG_FLAG_STRIP_ERROR_NUMBERS |
-         PNG_FLAG_STRIP_ERROR_TEXT))&strip_mode);
-   }
-}
-#endif
-
-#if defined(PNG_SIMPLIFIED_READ_SUPPORTED) ||\
-   defined(PNG_SIMPLIFIED_WRITE_SUPPORTED)
-   /* Currently the above both depend on SETJMP_SUPPORTED, however it would be
-    * possible to implement without setjmp support just so long as there is some
-    * way to handle the error return here:
-    */
-PNG_FUNCTION(void /* PRIVATE */, (PNGCBAPI
-png_safe_error),(png_structp png_nonconst_ptr, png_const_charp error_message),
-    PNG_NORETURN)
-{
-   png_const_structrp png_ptr = png_nonconst_ptr;
-   png_imagep image = png_voidcast(png_imagep, png_ptr->error_ptr);
-
-   /* An error is always logged here, overwriting anything (typically a warning)
-    * that is already there:
-    */
-   if (image != NULL)
-   {
-      png_safecat(image->message, (sizeof image->message), 0, error_message);
-      image->warning_or_error |= PNG_IMAGE_ERROR;
-
-      /* Retrieve the jmp_buf from within the png_control, making this work for
-       * C++ compilation too is pretty tricky: C++ wants a pointer to the first
-       * element of a jmp_buf, but C doesn't tell us the type of that.
-       */
-      if (image->opaque != NULL && image->opaque->error_buf != NULL)
-         longjmp(png_control_jmp_buf(image->opaque), 1);
-
-      /* Missing longjmp buffer, the following is to help debugging: */
-      {
-         size_t pos = png_safecat(image->message, (sizeof image->message), 0,
-             "bad longjmp: ");
-         png_safecat(image->message, (sizeof image->message), pos,
-             error_message);
-      }
-   }
-
-   /* Here on an internal programming error. */
-   abort();
-}
-
-#ifdef PNG_WARNINGS_SUPPORTED
-void /* PRIVATE */ PNGCBAPI
-png_safe_warning(png_structp png_nonconst_ptr, png_const_charp warning_message)
-{
-   png_const_structrp png_ptr = png_nonconst_ptr;
-   png_imagep image = png_voidcast(png_imagep, png_ptr->error_ptr);
-
-   /* A warning is only logged if there is no prior warning or error. */
-   if (image->warning_or_error == 0)
-   {
-      png_safecat(image->message, (sizeof image->message), 0, warning_message);
-      image->warning_or_error |= PNG_IMAGE_WARNING;
-   }
-}
-#endif
-
-int /* PRIVATE */
-png_safe_execute(png_imagep image, int (*function)(png_voidp), png_voidp arg)
-{
-   png_voidp saved_error_buf = image->opaque->error_buf;
-   jmp_buf safe_jmpbuf;
-   int result;
-
-   /* Safely execute function(arg), with png_error returning back here. */
-   if (setjmp(safe_jmpbuf) == 0)
-   {
-      image->opaque->error_buf = safe_jmpbuf;
-      result = function(arg);
-      image->opaque->error_buf = saved_error_buf;
-      return result;
-   }
-
-   /* On png_error, return via longjmp, pop the jmpbuf, and free the image. */
-   image->opaque->error_buf = saved_error_buf;
-   png_image_free(image);
-   return 0;
-}
-#endif /* SIMPLIFIED READ || SIMPLIFIED_WRITE */
-#endif /* READ || WRITE */
diff --git a/dep/libpng/src/pngget.c b/dep/libpng/src/pngget.c
deleted file mode 100644
index 1084b268f..000000000
--- a/dep/libpng/src/pngget.c
+++ /dev/null
@@ -1,1267 +0,0 @@
-
-/* pngget.c - retrieval of values from info struct
- *
- * Copyright (c) 2018-2024 Cosmin Truta
- * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
- * Copyright (c) 1996-1997 Andreas Dilger
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- *
- */
-
-#include "pngpriv.h"
-
-#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
-
-png_uint_32 PNGAPI
-png_get_valid(png_const_structrp png_ptr, png_const_inforp info_ptr,
-    png_uint_32 flag)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-   {
-#ifdef PNG_READ_tRNS_SUPPORTED
-      /* png_handle_PLTE() may have canceled a valid tRNS chunk but left the
-       * 'valid' flag for the detection of duplicate chunks. Do not report a
-       * valid tRNS chunk in this case.
-       */
-      if (flag == PNG_INFO_tRNS && png_ptr->num_trans == 0)
-         return 0;
-#endif
-
-      return info_ptr->valid & flag;
-   }
-
-   return 0;
-}
-
-size_t PNGAPI
-png_get_rowbytes(png_const_structrp png_ptr, png_const_inforp info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return info_ptr->rowbytes;
-
-   return 0;
-}
-
-#ifdef PNG_INFO_IMAGE_SUPPORTED
-png_bytepp PNGAPI
-png_get_rows(png_const_structrp png_ptr, png_const_inforp info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return info_ptr->row_pointers;
-
-   return 0;
-}
-#endif
-
-#ifdef PNG_EASY_ACCESS_SUPPORTED
-/* Easy access to info, added in libpng-0.99 */
-png_uint_32 PNGAPI
-png_get_image_width(png_const_structrp png_ptr, png_const_inforp info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return info_ptr->width;
-
-   return 0;
-}
-
-png_uint_32 PNGAPI
-png_get_image_height(png_const_structrp png_ptr, png_const_inforp info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return info_ptr->height;
-
-   return 0;
-}
-
-png_byte PNGAPI
-png_get_bit_depth(png_const_structrp png_ptr, png_const_inforp info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return info_ptr->bit_depth;
-
-   return 0;
-}
-
-png_byte PNGAPI
-png_get_color_type(png_const_structrp png_ptr, png_const_inforp info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return info_ptr->color_type;
-
-   return 0;
-}
-
-png_byte PNGAPI
-png_get_filter_type(png_const_structrp png_ptr, png_const_inforp info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return info_ptr->filter_type;
-
-   return 0;
-}
-
-png_byte PNGAPI
-png_get_interlace_type(png_const_structrp png_ptr, png_const_inforp info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return info_ptr->interlace_type;
-
-   return 0;
-}
-
-png_byte PNGAPI
-png_get_compression_type(png_const_structrp png_ptr, png_const_inforp info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return info_ptr->compression_type;
-
-   return 0;
-}
-
-png_uint_32 PNGAPI
-png_get_x_pixels_per_meter(png_const_structrp png_ptr, png_const_inforp
-   info_ptr)
-{
-#ifdef PNG_pHYs_SUPPORTED
-   png_debug(1, "in png_get_x_pixels_per_meter");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_pHYs) != 0)
-   {
-      if (info_ptr->phys_unit_type == PNG_RESOLUTION_METER)
-         return info_ptr->x_pixels_per_unit;
-   }
-#else
-   PNG_UNUSED(png_ptr)
-   PNG_UNUSED(info_ptr)
-#endif
-
-   return 0;
-}
-
-png_uint_32 PNGAPI
-png_get_y_pixels_per_meter(png_const_structrp png_ptr, png_const_inforp
-    info_ptr)
-{
-#ifdef PNG_pHYs_SUPPORTED
-   png_debug(1, "in png_get_y_pixels_per_meter");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_pHYs) != 0)
-   {
-      if (info_ptr->phys_unit_type == PNG_RESOLUTION_METER)
-         return info_ptr->y_pixels_per_unit;
-   }
-#else
-   PNG_UNUSED(png_ptr)
-   PNG_UNUSED(info_ptr)
-#endif
-
-   return 0;
-}
-
-png_uint_32 PNGAPI
-png_get_pixels_per_meter(png_const_structrp png_ptr, png_const_inforp info_ptr)
-{
-#ifdef PNG_pHYs_SUPPORTED
-   png_debug(1, "in png_get_pixels_per_meter");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_pHYs) != 0)
-   {
-      if (info_ptr->phys_unit_type == PNG_RESOLUTION_METER &&
-          info_ptr->x_pixels_per_unit == info_ptr->y_pixels_per_unit)
-         return info_ptr->x_pixels_per_unit;
-   }
-#else
-   PNG_UNUSED(png_ptr)
-   PNG_UNUSED(info_ptr)
-#endif
-
-   return 0;
-}
-
-#ifdef PNG_FLOATING_POINT_SUPPORTED
-float PNGAPI
-png_get_pixel_aspect_ratio(png_const_structrp png_ptr, png_const_inforp
-   info_ptr)
-{
-#ifdef PNG_READ_pHYs_SUPPORTED
-   png_debug(1, "in png_get_pixel_aspect_ratio");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_pHYs) != 0)
-   {
-      if (info_ptr->x_pixels_per_unit != 0)
-         return (float)info_ptr->y_pixels_per_unit
-              / (float)info_ptr->x_pixels_per_unit;
-   }
-#else
-   PNG_UNUSED(png_ptr)
-   PNG_UNUSED(info_ptr)
-#endif
-
-   return (float)0.0;
-}
-#endif
-
-#ifdef PNG_FIXED_POINT_SUPPORTED
-png_fixed_point PNGAPI
-png_get_pixel_aspect_ratio_fixed(png_const_structrp png_ptr,
-    png_const_inforp info_ptr)
-{
-#ifdef PNG_READ_pHYs_SUPPORTED
-   png_debug(1, "in png_get_pixel_aspect_ratio_fixed");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_pHYs) != 0 &&
-       info_ptr->x_pixels_per_unit > 0 && info_ptr->y_pixels_per_unit > 0 &&
-       info_ptr->x_pixels_per_unit <= PNG_UINT_31_MAX &&
-       info_ptr->y_pixels_per_unit <= PNG_UINT_31_MAX)
-   {
-      png_fixed_point res;
-
-      /* The following casts work because a PNG 4 byte integer only has a valid
-       * range of 0..2^31-1; otherwise the cast might overflow.
-       */
-      if (png_muldiv(&res, (png_int_32)info_ptr->y_pixels_per_unit, PNG_FP_1,
-          (png_int_32)info_ptr->x_pixels_per_unit) != 0)
-         return res;
-   }
-#else
-   PNG_UNUSED(png_ptr)
-   PNG_UNUSED(info_ptr)
-#endif
-
-   return 0;
-}
-#endif
-
-png_int_32 PNGAPI
-png_get_x_offset_microns(png_const_structrp png_ptr, png_const_inforp info_ptr)
-{
-#ifdef PNG_oFFs_SUPPORTED
-   png_debug(1, "in png_get_x_offset_microns");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_oFFs) != 0)
-   {
-      if (info_ptr->offset_unit_type == PNG_OFFSET_MICROMETER)
-         return info_ptr->x_offset;
-   }
-#else
-   PNG_UNUSED(png_ptr)
-   PNG_UNUSED(info_ptr)
-#endif
-
-   return 0;
-}
-
-png_int_32 PNGAPI
-png_get_y_offset_microns(png_const_structrp png_ptr, png_const_inforp info_ptr)
-{
-#ifdef PNG_oFFs_SUPPORTED
-   png_debug(1, "in png_get_y_offset_microns");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_oFFs) != 0)
-   {
-      if (info_ptr->offset_unit_type == PNG_OFFSET_MICROMETER)
-         return info_ptr->y_offset;
-   }
-#else
-   PNG_UNUSED(png_ptr)
-   PNG_UNUSED(info_ptr)
-#endif
-
-   return 0;
-}
-
-png_int_32 PNGAPI
-png_get_x_offset_pixels(png_const_structrp png_ptr, png_const_inforp info_ptr)
-{
-#ifdef PNG_oFFs_SUPPORTED
-   png_debug(1, "in png_get_x_offset_pixels");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_oFFs) != 0)
-   {
-      if (info_ptr->offset_unit_type == PNG_OFFSET_PIXEL)
-         return info_ptr->x_offset;
-   }
-#else
-   PNG_UNUSED(png_ptr)
-   PNG_UNUSED(info_ptr)
-#endif
-
-   return 0;
-}
-
-png_int_32 PNGAPI
-png_get_y_offset_pixels(png_const_structrp png_ptr, png_const_inforp info_ptr)
-{
-#ifdef PNG_oFFs_SUPPORTED
-   png_debug(1, "in png_get_y_offset_pixels");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_oFFs) != 0)
-   {
-      if (info_ptr->offset_unit_type == PNG_OFFSET_PIXEL)
-         return info_ptr->y_offset;
-   }
-#else
-   PNG_UNUSED(png_ptr)
-   PNG_UNUSED(info_ptr)
-#endif
-
-   return 0;
-}
-
-#ifdef PNG_INCH_CONVERSIONS_SUPPORTED
-static png_uint_32
-ppi_from_ppm(png_uint_32 ppm)
-{
-#if 0
-   /* The conversion is *(2.54/100), in binary (32 digits):
-    * .00000110100000001001110101001001
-    */
-   png_uint_32 t1001, t1101;
-   ppm >>= 1;                  /* .1 */
-   t1001 = ppm + (ppm >> 3);   /* .1001 */
-   t1101 = t1001 + (ppm >> 1); /* .1101 */
-   ppm >>= 20;                 /* .000000000000000000001 */
-   t1101 += t1101 >> 15;       /* .1101000000000001101 */
-   t1001 >>= 11;               /* .000000000001001 */
-   t1001 += t1001 >> 12;       /* .000000000001001000000001001 */
-   ppm += t1001;               /* .000000000001001000001001001 */
-   ppm += t1101;               /* .110100000001001110101001001 */
-   return (ppm + 16) >> 5;/* .00000110100000001001110101001001 */
-#else
-   /* The argument is a PNG unsigned integer, so it is not permitted
-    * to be bigger than 2^31.
-    */
-   png_fixed_point result;
-   if (ppm <= PNG_UINT_31_MAX && png_muldiv(&result, (png_int_32)ppm, 127,
-       5000) != 0)
-      return (png_uint_32)result;
-
-   /* Overflow. */
-   return 0;
-#endif
-}
-
-png_uint_32 PNGAPI
-png_get_pixels_per_inch(png_const_structrp png_ptr, png_const_inforp info_ptr)
-{
-   return ppi_from_ppm(png_get_pixels_per_meter(png_ptr, info_ptr));
-}
-
-png_uint_32 PNGAPI
-png_get_x_pixels_per_inch(png_const_structrp png_ptr, png_const_inforp info_ptr)
-{
-   return ppi_from_ppm(png_get_x_pixels_per_meter(png_ptr, info_ptr));
-}
-
-png_uint_32 PNGAPI
-png_get_y_pixels_per_inch(png_const_structrp png_ptr, png_const_inforp info_ptr)
-{
-   return ppi_from_ppm(png_get_y_pixels_per_meter(png_ptr, info_ptr));
-}
-
-#ifdef PNG_FIXED_POINT_SUPPORTED
-static png_fixed_point
-png_fixed_inches_from_microns(png_const_structrp png_ptr, png_int_32 microns)
-{
-   /* Convert from meters * 1,000,000 to inches * 100,000, meters to
-    * inches is simply *(100/2.54), so we want *(10/2.54) == 500/127.
-    * Notice that this can overflow - a warning is output and 0 is
-    * returned.
-    */
-   return png_muldiv_warn(png_ptr, microns, 500, 127);
-}
-
-png_fixed_point PNGAPI
-png_get_x_offset_inches_fixed(png_const_structrp png_ptr,
-    png_const_inforp info_ptr)
-{
-   return png_fixed_inches_from_microns(png_ptr,
-       png_get_x_offset_microns(png_ptr, info_ptr));
-}
-#endif
-
-#ifdef PNG_FIXED_POINT_SUPPORTED
-png_fixed_point PNGAPI
-png_get_y_offset_inches_fixed(png_const_structrp png_ptr,
-    png_const_inforp info_ptr)
-{
-   return png_fixed_inches_from_microns(png_ptr,
-       png_get_y_offset_microns(png_ptr, info_ptr));
-}
-#endif
-
-#ifdef PNG_FLOATING_POINT_SUPPORTED
-float PNGAPI
-png_get_x_offset_inches(png_const_structrp png_ptr, png_const_inforp info_ptr)
-{
-   /* To avoid the overflow do the conversion directly in floating
-    * point.
-    */
-   return (float)(png_get_x_offset_microns(png_ptr, info_ptr) * .00003937);
-}
-#endif
-
-#ifdef PNG_FLOATING_POINT_SUPPORTED
-float PNGAPI
-png_get_y_offset_inches(png_const_structrp png_ptr, png_const_inforp info_ptr)
-{
-   /* To avoid the overflow do the conversion directly in floating
-    * point.
-    */
-   return (float)(png_get_y_offset_microns(png_ptr, info_ptr) * .00003937);
-}
-#endif
-
-#ifdef PNG_pHYs_SUPPORTED
-png_uint_32 PNGAPI
-png_get_pHYs_dpi(png_const_structrp png_ptr, png_const_inforp info_ptr,
-    png_uint_32 *res_x, png_uint_32 *res_y, int *unit_type)
-{
-   png_uint_32 retval = 0;
-
-   png_debug1(1, "in %s retrieval function", "pHYs");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_pHYs) != 0)
-   {
-      if (res_x != NULL)
-      {
-         *res_x = info_ptr->x_pixels_per_unit;
-         retval |= PNG_INFO_pHYs;
-      }
-
-      if (res_y != NULL)
-      {
-         *res_y = info_ptr->y_pixels_per_unit;
-         retval |= PNG_INFO_pHYs;
-      }
-
-      if (unit_type != NULL)
-      {
-         *unit_type = (int)info_ptr->phys_unit_type;
-         retval |= PNG_INFO_pHYs;
-
-         if (*unit_type == 1)
-         {
-            if (res_x != NULL) *res_x = (png_uint_32)(*res_x * .0254 + .50);
-            if (res_y != NULL) *res_y = (png_uint_32)(*res_y * .0254 + .50);
-         }
-      }
-   }
-
-   return retval;
-}
-#endif /* pHYs */
-#endif /* INCH_CONVERSIONS */
-
-/* png_get_channels really belongs in here, too, but it's been around longer */
-
-#endif /* EASY_ACCESS */
-
-
-png_byte PNGAPI
-png_get_channels(png_const_structrp png_ptr, png_const_inforp info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return info_ptr->channels;
-
-   return 0;
-}
-
-#ifdef PNG_READ_SUPPORTED
-png_const_bytep PNGAPI
-png_get_signature(png_const_structrp png_ptr, png_const_inforp info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return info_ptr->signature;
-
-   return NULL;
-}
-#endif
-
-#ifdef PNG_bKGD_SUPPORTED
-png_uint_32 PNGAPI
-png_get_bKGD(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_color_16p *background)
-{
-   png_debug1(1, "in %s retrieval function", "bKGD");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_bKGD) != 0 &&
-       background != NULL)
-   {
-      *background = &(info_ptr->background);
-      return PNG_INFO_bKGD;
-   }
-
-   return 0;
-}
-#endif
-
-#ifdef PNG_cHRM_SUPPORTED
-/* The XYZ APIs were added in 1.5.5 to take advantage of the code added at the
- * same time to correct the rgb grayscale coefficient defaults obtained from the
- * cHRM chunk in 1.5.4
- */
-#  ifdef PNG_FLOATING_POINT_SUPPORTED
-png_uint_32 PNGAPI
-png_get_cHRM(png_const_structrp png_ptr, png_const_inforp info_ptr,
-    double *white_x, double *white_y, double *red_x, double *red_y,
-    double *green_x, double *green_y, double *blue_x, double *blue_y)
-{
-   png_debug1(1, "in %s retrieval function", "cHRM");
-
-   /* Quiet API change: this code used to only return the end points if a cHRM
-    * chunk was present, but the end points can also come from iCCP or sRGB
-    * chunks, so in 1.6.0 the png_get_ APIs return the end points regardless and
-    * the png_set_ APIs merely check that set end points are mutually
-    * consistent.
-    */
-   if (png_ptr != NULL && info_ptr != NULL &&
-      (info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0)
-   {
-      if (white_x != NULL)
-         *white_x = png_float(png_ptr,
-             info_ptr->colorspace.end_points_xy.whitex, "cHRM white X");
-      if (white_y != NULL)
-         *white_y = png_float(png_ptr,
-             info_ptr->colorspace.end_points_xy.whitey, "cHRM white Y");
-      if (red_x != NULL)
-         *red_x = png_float(png_ptr, info_ptr->colorspace.end_points_xy.redx,
-             "cHRM red X");
-      if (red_y != NULL)
-         *red_y = png_float(png_ptr, info_ptr->colorspace.end_points_xy.redy,
-             "cHRM red Y");
-      if (green_x != NULL)
-         *green_x = png_float(png_ptr,
-             info_ptr->colorspace.end_points_xy.greenx, "cHRM green X");
-      if (green_y != NULL)
-         *green_y = png_float(png_ptr,
-             info_ptr->colorspace.end_points_xy.greeny, "cHRM green Y");
-      if (blue_x != NULL)
-         *blue_x = png_float(png_ptr, info_ptr->colorspace.end_points_xy.bluex,
-             "cHRM blue X");
-      if (blue_y != NULL)
-         *blue_y = png_float(png_ptr, info_ptr->colorspace.end_points_xy.bluey,
-             "cHRM blue Y");
-      return PNG_INFO_cHRM;
-   }
-
-   return 0;
-}
-
-png_uint_32 PNGAPI
-png_get_cHRM_XYZ(png_const_structrp png_ptr, png_const_inforp info_ptr,
-    double *red_X, double *red_Y, double *red_Z, double *green_X,
-    double *green_Y, double *green_Z, double *blue_X, double *blue_Y,
-    double *blue_Z)
-{
-   png_debug1(1, "in %s retrieval function", "cHRM_XYZ(float)");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0)
-   {
-      if (red_X != NULL)
-         *red_X = png_float(png_ptr, info_ptr->colorspace.end_points_XYZ.red_X,
-             "cHRM red X");
-      if (red_Y != NULL)
-         *red_Y = png_float(png_ptr, info_ptr->colorspace.end_points_XYZ.red_Y,
-             "cHRM red Y");
-      if (red_Z != NULL)
-         *red_Z = png_float(png_ptr, info_ptr->colorspace.end_points_XYZ.red_Z,
-             "cHRM red Z");
-      if (green_X != NULL)
-         *green_X = png_float(png_ptr,
-             info_ptr->colorspace.end_points_XYZ.green_X, "cHRM green X");
-      if (green_Y != NULL)
-         *green_Y = png_float(png_ptr,
-             info_ptr->colorspace.end_points_XYZ.green_Y, "cHRM green Y");
-      if (green_Z != NULL)
-         *green_Z = png_float(png_ptr,
-             info_ptr->colorspace.end_points_XYZ.green_Z, "cHRM green Z");
-      if (blue_X != NULL)
-         *blue_X = png_float(png_ptr,
-             info_ptr->colorspace.end_points_XYZ.blue_X, "cHRM blue X");
-      if (blue_Y != NULL)
-         *blue_Y = png_float(png_ptr,
-             info_ptr->colorspace.end_points_XYZ.blue_Y, "cHRM blue Y");
-      if (blue_Z != NULL)
-         *blue_Z = png_float(png_ptr,
-             info_ptr->colorspace.end_points_XYZ.blue_Z, "cHRM blue Z");
-      return PNG_INFO_cHRM;
-   }
-
-   return 0;
-}
-#  endif
-
-#  ifdef PNG_FIXED_POINT_SUPPORTED
-png_uint_32 PNGAPI
-png_get_cHRM_XYZ_fixed(png_const_structrp png_ptr, png_const_inforp info_ptr,
-    png_fixed_point *int_red_X, png_fixed_point *int_red_Y,
-    png_fixed_point *int_red_Z, png_fixed_point *int_green_X,
-    png_fixed_point *int_green_Y, png_fixed_point *int_green_Z,
-    png_fixed_point *int_blue_X, png_fixed_point *int_blue_Y,
-    png_fixed_point *int_blue_Z)
-{
-   png_debug1(1, "in %s retrieval function", "cHRM_XYZ");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-      (info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0)
-   {
-      if (int_red_X != NULL)
-         *int_red_X = info_ptr->colorspace.end_points_XYZ.red_X;
-      if (int_red_Y != NULL)
-         *int_red_Y = info_ptr->colorspace.end_points_XYZ.red_Y;
-      if (int_red_Z != NULL)
-         *int_red_Z = info_ptr->colorspace.end_points_XYZ.red_Z;
-      if (int_green_X != NULL)
-         *int_green_X = info_ptr->colorspace.end_points_XYZ.green_X;
-      if (int_green_Y != NULL)
-         *int_green_Y = info_ptr->colorspace.end_points_XYZ.green_Y;
-      if (int_green_Z != NULL)
-         *int_green_Z = info_ptr->colorspace.end_points_XYZ.green_Z;
-      if (int_blue_X != NULL)
-         *int_blue_X = info_ptr->colorspace.end_points_XYZ.blue_X;
-      if (int_blue_Y != NULL)
-         *int_blue_Y = info_ptr->colorspace.end_points_XYZ.blue_Y;
-      if (int_blue_Z != NULL)
-         *int_blue_Z = info_ptr->colorspace.end_points_XYZ.blue_Z;
-      return PNG_INFO_cHRM;
-   }
-
-   return 0;
-}
-
-png_uint_32 PNGAPI
-png_get_cHRM_fixed(png_const_structrp png_ptr, png_const_inforp info_ptr,
-    png_fixed_point *white_x, png_fixed_point *white_y, png_fixed_point *red_x,
-    png_fixed_point *red_y, png_fixed_point *green_x, png_fixed_point *green_y,
-    png_fixed_point *blue_x, png_fixed_point *blue_y)
-{
-   png_debug1(1, "in %s retrieval function", "cHRM");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-      (info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0)
-   {
-      if (white_x != NULL)
-         *white_x = info_ptr->colorspace.end_points_xy.whitex;
-      if (white_y != NULL)
-         *white_y = info_ptr->colorspace.end_points_xy.whitey;
-      if (red_x != NULL)
-         *red_x = info_ptr->colorspace.end_points_xy.redx;
-      if (red_y != NULL)
-         *red_y = info_ptr->colorspace.end_points_xy.redy;
-      if (green_x != NULL)
-         *green_x = info_ptr->colorspace.end_points_xy.greenx;
-      if (green_y != NULL)
-         *green_y = info_ptr->colorspace.end_points_xy.greeny;
-      if (blue_x != NULL)
-         *blue_x = info_ptr->colorspace.end_points_xy.bluex;
-      if (blue_y != NULL)
-         *blue_y = info_ptr->colorspace.end_points_xy.bluey;
-      return PNG_INFO_cHRM;
-   }
-
-   return 0;
-}
-#  endif
-#endif
-
-#ifdef PNG_gAMA_SUPPORTED
-#  ifdef PNG_FIXED_POINT_SUPPORTED
-png_uint_32 PNGAPI
-png_get_gAMA_fixed(png_const_structrp png_ptr, png_const_inforp info_ptr,
-    png_fixed_point *file_gamma)
-{
-   png_debug1(1, "in %s retrieval function", "gAMA");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_GAMMA) != 0 &&
-       file_gamma != NULL)
-   {
-      *file_gamma = info_ptr->colorspace.gamma;
-      return PNG_INFO_gAMA;
-   }
-
-   return 0;
-}
-#  endif
-
-#  ifdef PNG_FLOATING_POINT_SUPPORTED
-png_uint_32 PNGAPI
-png_get_gAMA(png_const_structrp png_ptr, png_const_inforp info_ptr,
-    double *file_gamma)
-{
-   png_debug1(1, "in %s retrieval function", "gAMA(float)");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-      (info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_GAMMA) != 0 &&
-      file_gamma != NULL)
-   {
-      *file_gamma = png_float(png_ptr, info_ptr->colorspace.gamma,
-          "png_get_gAMA");
-      return PNG_INFO_gAMA;
-   }
-
-   return 0;
-}
-#  endif
-#endif
-
-#ifdef PNG_sRGB_SUPPORTED
-png_uint_32 PNGAPI
-png_get_sRGB(png_const_structrp png_ptr, png_const_inforp info_ptr,
-    int *file_srgb_intent)
-{
-   png_debug1(1, "in %s retrieval function", "sRGB");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-      (info_ptr->valid & PNG_INFO_sRGB) != 0 && file_srgb_intent != NULL)
-   {
-      *file_srgb_intent = info_ptr->colorspace.rendering_intent;
-      return PNG_INFO_sRGB;
-   }
-
-   return 0;
-}
-#endif
-
-#ifdef PNG_iCCP_SUPPORTED
-png_uint_32 PNGAPI
-png_get_iCCP(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_charpp name, int *compression_type,
-    png_bytepp profile, png_uint_32 *proflen)
-{
-   png_debug1(1, "in %s retrieval function", "iCCP");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_iCCP) != 0 &&
-       name != NULL && profile != NULL && proflen != NULL)
-   {
-      *name = info_ptr->iccp_name;
-      *profile = info_ptr->iccp_profile;
-      *proflen = png_get_uint_32(info_ptr->iccp_profile);
-      /* This is somewhat irrelevant since the profile data returned has
-       * actually been uncompressed.
-       */
-      if (compression_type != NULL)
-         *compression_type = PNG_COMPRESSION_TYPE_BASE;
-      return PNG_INFO_iCCP;
-   }
-
-   return 0;
-
-}
-#endif
-
-#ifdef PNG_sPLT_SUPPORTED
-int PNGAPI
-png_get_sPLT(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_sPLT_tpp spalettes)
-{
-   png_debug1(1, "in %s retrieval function", "sPLT");
-
-   if (png_ptr != NULL && info_ptr != NULL && spalettes != NULL)
-   {
-      *spalettes = info_ptr->splt_palettes;
-      return info_ptr->splt_palettes_num;
-   }
-
-   return 0;
-}
-#endif
-
-#ifdef PNG_eXIf_SUPPORTED
-png_uint_32 PNGAPI
-png_get_eXIf(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_bytep *exif)
-{
-  png_warning(png_ptr, "png_get_eXIf does not work; use png_get_eXIf_1");
-  PNG_UNUSED(info_ptr)
-  PNG_UNUSED(exif)
-  return 0;
-}
-
-png_uint_32 PNGAPI
-png_get_eXIf_1(png_const_structrp png_ptr, png_const_inforp info_ptr,
-    png_uint_32 *num_exif, png_bytep *exif)
-{
-   png_debug1(1, "in %s retrieval function", "eXIf");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_eXIf) != 0 && exif != NULL)
-   {
-      *num_exif = info_ptr->num_exif;
-      *exif = info_ptr->exif;
-      return PNG_INFO_eXIf;
-   }
-
-   return 0;
-}
-#endif
-
-#ifdef PNG_hIST_SUPPORTED
-png_uint_32 PNGAPI
-png_get_hIST(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_uint_16p *hist)
-{
-   png_debug1(1, "in %s retrieval function", "hIST");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_hIST) != 0 && hist != NULL)
-   {
-      *hist = info_ptr->hist;
-      return PNG_INFO_hIST;
-   }
-
-   return 0;
-}
-#endif
-
-png_uint_32 PNGAPI
-png_get_IHDR(png_const_structrp png_ptr, png_const_inforp info_ptr,
-    png_uint_32 *width, png_uint_32 *height, int *bit_depth,
-    int *color_type, int *interlace_type, int *compression_type,
-    int *filter_type)
-{
-   png_debug1(1, "in %s retrieval function", "IHDR");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return 0;
-
-   if (width != NULL)
-       *width = info_ptr->width;
-
-   if (height != NULL)
-       *height = info_ptr->height;
-
-   if (bit_depth != NULL)
-       *bit_depth = info_ptr->bit_depth;
-
-   if (color_type != NULL)
-       *color_type = info_ptr->color_type;
-
-   if (compression_type != NULL)
-      *compression_type = info_ptr->compression_type;
-
-   if (filter_type != NULL)
-      *filter_type = info_ptr->filter_type;
-
-   if (interlace_type != NULL)
-      *interlace_type = info_ptr->interlace_type;
-
-   /* This is redundant if we can be sure that the info_ptr values were all
-    * assigned in png_set_IHDR().  We do the check anyhow in case an
-    * application has ignored our advice not to mess with the members
-    * of info_ptr directly.
-    */
-   png_check_IHDR(png_ptr, info_ptr->width, info_ptr->height,
-       info_ptr->bit_depth, info_ptr->color_type, info_ptr->interlace_type,
-       info_ptr->compression_type, info_ptr->filter_type);
-
-   return 1;
-}
-
-#ifdef PNG_oFFs_SUPPORTED
-png_uint_32 PNGAPI
-png_get_oFFs(png_const_structrp png_ptr, png_const_inforp info_ptr,
-    png_int_32 *offset_x, png_int_32 *offset_y, int *unit_type)
-{
-   png_debug1(1, "in %s retrieval function", "oFFs");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_oFFs) != 0 &&
-       offset_x != NULL && offset_y != NULL && unit_type != NULL)
-   {
-      *offset_x = info_ptr->x_offset;
-      *offset_y = info_ptr->y_offset;
-      *unit_type = (int)info_ptr->offset_unit_type;
-      return PNG_INFO_oFFs;
-   }
-
-   return 0;
-}
-#endif
-
-#ifdef PNG_pCAL_SUPPORTED
-png_uint_32 PNGAPI
-png_get_pCAL(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_charp *purpose, png_int_32 *X0, png_int_32 *X1, int *type, int *nparams,
-    png_charp *units, png_charpp *params)
-{
-   png_debug1(1, "in %s retrieval function", "pCAL");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_pCAL) != 0 &&
-       purpose != NULL && X0 != NULL && X1 != NULL && type != NULL &&
-       nparams != NULL && units != NULL && params != NULL)
-   {
-      *purpose = info_ptr->pcal_purpose;
-      *X0 = info_ptr->pcal_X0;
-      *X1 = info_ptr->pcal_X1;
-      *type = (int)info_ptr->pcal_type;
-      *nparams = (int)info_ptr->pcal_nparams;
-      *units = info_ptr->pcal_units;
-      *params = info_ptr->pcal_params;
-      return PNG_INFO_pCAL;
-   }
-
-   return 0;
-}
-#endif
-
-#ifdef PNG_sCAL_SUPPORTED
-#  ifdef PNG_FIXED_POINT_SUPPORTED
-#    if defined(PNG_FLOATING_ARITHMETIC_SUPPORTED) || \
-         defined(PNG_FLOATING_POINT_SUPPORTED)
-png_uint_32 PNGAPI
-png_get_sCAL_fixed(png_const_structrp png_ptr, png_const_inforp info_ptr,
-    int *unit, png_fixed_point *width, png_fixed_point *height)
-{
-   png_debug1(1, "in %s retrieval function", "sCAL");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_sCAL) != 0)
-   {
-      *unit = info_ptr->scal_unit;
-      /*TODO: make this work without FP support; the API is currently eliminated
-       * if neither floating point APIs nor internal floating point arithmetic
-       * are enabled.
-       */
-      *width = png_fixed(png_ptr, atof(info_ptr->scal_s_width), "sCAL width");
-      *height = png_fixed(png_ptr, atof(info_ptr->scal_s_height),
-          "sCAL height");
-      return PNG_INFO_sCAL;
-   }
-
-   return 0;
-}
-#    endif /* FLOATING_ARITHMETIC */
-#  endif /* FIXED_POINT */
-#  ifdef PNG_FLOATING_POINT_SUPPORTED
-png_uint_32 PNGAPI
-png_get_sCAL(png_const_structrp png_ptr, png_const_inforp info_ptr,
-    int *unit, double *width, double *height)
-{
-   png_debug1(1, "in %s retrieval function", "sCAL(float)");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_sCAL) != 0)
-   {
-      *unit = info_ptr->scal_unit;
-      *width = atof(info_ptr->scal_s_width);
-      *height = atof(info_ptr->scal_s_height);
-      return PNG_INFO_sCAL;
-   }
-
-   return 0;
-}
-#  endif /* FLOATING POINT */
-png_uint_32 PNGAPI
-png_get_sCAL_s(png_const_structrp png_ptr, png_const_inforp info_ptr,
-    int *unit, png_charpp width, png_charpp height)
-{
-   png_debug1(1, "in %s retrieval function", "sCAL(str)");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_sCAL) != 0)
-   {
-      *unit = info_ptr->scal_unit;
-      *width = info_ptr->scal_s_width;
-      *height = info_ptr->scal_s_height;
-      return PNG_INFO_sCAL;
-   }
-
-   return 0;
-}
-#endif /* sCAL */
-
-#ifdef PNG_pHYs_SUPPORTED
-png_uint_32 PNGAPI
-png_get_pHYs(png_const_structrp png_ptr, png_const_inforp info_ptr,
-    png_uint_32 *res_x, png_uint_32 *res_y, int *unit_type)
-{
-   png_uint_32 retval = 0;
-
-   png_debug1(1, "in %s retrieval function", "pHYs");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_pHYs) != 0)
-   {
-      if (res_x != NULL)
-      {
-         *res_x = info_ptr->x_pixels_per_unit;
-         retval |= PNG_INFO_pHYs;
-      }
-
-      if (res_y != NULL)
-      {
-         *res_y = info_ptr->y_pixels_per_unit;
-         retval |= PNG_INFO_pHYs;
-      }
-
-      if (unit_type != NULL)
-      {
-         *unit_type = (int)info_ptr->phys_unit_type;
-         retval |= PNG_INFO_pHYs;
-      }
-   }
-
-   return retval;
-}
-#endif /* pHYs */
-
-png_uint_32 PNGAPI
-png_get_PLTE(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_colorp *palette, int *num_palette)
-{
-   png_debug1(1, "in %s retrieval function", "PLTE");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_PLTE) != 0 && palette != NULL)
-   {
-      *palette = info_ptr->palette;
-      *num_palette = info_ptr->num_palette;
-      png_debug1(3, "num_palette = %d", *num_palette);
-      return PNG_INFO_PLTE;
-   }
-
-   return 0;
-}
-
-#ifdef PNG_sBIT_SUPPORTED
-png_uint_32 PNGAPI
-png_get_sBIT(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_color_8p *sig_bit)
-{
-   png_debug1(1, "in %s retrieval function", "sBIT");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_sBIT) != 0 && sig_bit != NULL)
-   {
-      *sig_bit = &(info_ptr->sig_bit);
-      return PNG_INFO_sBIT;
-   }
-
-   return 0;
-}
-#endif
-
-#ifdef PNG_TEXT_SUPPORTED
-int PNGAPI
-png_get_text(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_textp *text_ptr, int *num_text)
-{
-   if (png_ptr != NULL && info_ptr != NULL && info_ptr->num_text > 0)
-   {
-      png_debug1(1, "in text retrieval function, chunk typeid = 0x%lx",
-         (unsigned long)png_ptr->chunk_name);
-
-      if (text_ptr != NULL)
-         *text_ptr = info_ptr->text;
-
-      if (num_text != NULL)
-         *num_text = info_ptr->num_text;
-
-      return info_ptr->num_text;
-   }
-
-   if (num_text != NULL)
-      *num_text = 0;
-
-   return 0;
-}
-#endif
-
-#ifdef PNG_tIME_SUPPORTED
-png_uint_32 PNGAPI
-png_get_tIME(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_timep *mod_time)
-{
-   png_debug1(1, "in %s retrieval function", "tIME");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_tIME) != 0 && mod_time != NULL)
-   {
-      *mod_time = &(info_ptr->mod_time);
-      return PNG_INFO_tIME;
-   }
-
-   return 0;
-}
-#endif
-
-#ifdef PNG_tRNS_SUPPORTED
-png_uint_32 PNGAPI
-png_get_tRNS(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_bytep *trans_alpha, int *num_trans, png_color_16p *trans_color)
-{
-   png_uint_32 retval = 0;
-
-   png_debug1(1, "in %s retrieval function", "tRNS");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_tRNS) != 0)
-   {
-      if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-      {
-         if (trans_alpha != NULL)
-         {
-            *trans_alpha = info_ptr->trans_alpha;
-            retval |= PNG_INFO_tRNS;
-         }
-
-         if (trans_color != NULL)
-            *trans_color = &(info_ptr->trans_color);
-      }
-
-      else /* if (info_ptr->color_type != PNG_COLOR_TYPE_PALETTE) */
-      {
-         if (trans_color != NULL)
-         {
-            *trans_color = &(info_ptr->trans_color);
-            retval |= PNG_INFO_tRNS;
-         }
-
-         if (trans_alpha != NULL)
-            *trans_alpha = NULL;
-      }
-
-      if (num_trans != NULL)
-      {
-         *num_trans = info_ptr->num_trans;
-         retval |= PNG_INFO_tRNS;
-      }
-   }
-
-   return retval;
-}
-#endif
-
-#ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED
-int PNGAPI
-png_get_unknown_chunks(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_unknown_chunkpp unknowns)
-{
-   if (png_ptr != NULL && info_ptr != NULL && unknowns != NULL)
-   {
-      *unknowns = info_ptr->unknown_chunks;
-      return info_ptr->unknown_chunks_num;
-   }
-
-   return 0;
-}
-#endif
-
-#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
-png_byte PNGAPI
-png_get_rgb_to_gray_status(png_const_structrp png_ptr)
-{
-   return (png_byte)(png_ptr ? png_ptr->rgb_to_gray_status : 0);
-}
-#endif
-
-#ifdef PNG_USER_CHUNKS_SUPPORTED
-png_voidp PNGAPI
-png_get_user_chunk_ptr(png_const_structrp png_ptr)
-{
-   return (png_ptr ? png_ptr->user_chunk_ptr : NULL);
-}
-#endif
-
-size_t PNGAPI
-png_get_compression_buffer_size(png_const_structrp png_ptr)
-{
-   if (png_ptr == NULL)
-      return 0;
-
-#ifdef PNG_WRITE_SUPPORTED
-   if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0)
-#endif
-   {
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-      return png_ptr->IDAT_read_size;
-#else
-      return PNG_IDAT_READ_SIZE;
-#endif
-   }
-
-#ifdef PNG_WRITE_SUPPORTED
-   else
-      return png_ptr->zbuffer_size;
-#endif
-}
-
-#ifdef PNG_SET_USER_LIMITS_SUPPORTED
-/* These functions were added to libpng 1.2.6 and were enabled
- * by default in libpng-1.4.0 */
-png_uint_32 PNGAPI
-png_get_user_width_max(png_const_structrp png_ptr)
-{
-   return (png_ptr ? png_ptr->user_width_max : 0);
-}
-
-png_uint_32 PNGAPI
-png_get_user_height_max(png_const_structrp png_ptr)
-{
-   return (png_ptr ? png_ptr->user_height_max : 0);
-}
-
-/* This function was added to libpng 1.4.0 */
-png_uint_32 PNGAPI
-png_get_chunk_cache_max(png_const_structrp png_ptr)
-{
-   return (png_ptr ? png_ptr->user_chunk_cache_max : 0);
-}
-
-/* This function was added to libpng 1.4.1 */
-png_alloc_size_t PNGAPI
-png_get_chunk_malloc_max(png_const_structrp png_ptr)
-{
-   return (png_ptr ? png_ptr->user_chunk_malloc_max : 0);
-}
-#endif /* SET_USER_LIMITS */
-
-/* These functions were added to libpng 1.4.0 */
-#ifdef PNG_IO_STATE_SUPPORTED
-png_uint_32 PNGAPI
-png_get_io_state(png_const_structrp png_ptr)
-{
-   return png_ptr->io_state;
-}
-
-png_uint_32 PNGAPI
-png_get_io_chunk_type(png_const_structrp png_ptr)
-{
-   return png_ptr->chunk_name;
-}
-#endif /* IO_STATE */
-
-#ifdef PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED
-#  ifdef PNG_GET_PALETTE_MAX_SUPPORTED
-int PNGAPI
-png_get_palette_max(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return png_ptr->num_palette_max;
-
-   return -1;
-}
-#  endif
-#endif
-
-#endif /* READ || WRITE */
diff --git a/dep/libpng/src/pnginfo.h b/dep/libpng/src/pnginfo.h
deleted file mode 100644
index 1f98dedc4..000000000
--- a/dep/libpng/src/pnginfo.h
+++ /dev/null
@@ -1,267 +0,0 @@
-
-/* pnginfo.h - header file for PNG reference library
- *
- * Copyright (c) 2018 Cosmin Truta
- * Copyright (c) 1998-2002,2004,2006-2013,2018 Glenn Randers-Pehrson
- * Copyright (c) 1996-1997 Andreas Dilger
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
- /* png_info is a structure that holds the information in a PNG file so
- * that the application can find out the characteristics of the image.
- * If you are reading the file, this structure will tell you what is
- * in the PNG file.  If you are writing the file, fill in the information
- * you want to put into the PNG file, using png_set_*() functions, then
- * call png_write_info().
- *
- * The names chosen should be very close to the PNG specification, so
- * consult that document for information about the meaning of each field.
- *
- * With libpng < 0.95, it was only possible to directly set and read the
- * the values in the png_info_struct, which meant that the contents and
- * order of the values had to remain fixed.  With libpng 0.95 and later,
- * however, there are now functions that abstract the contents of
- * png_info_struct from the application, so this makes it easier to use
- * libpng with dynamic libraries, and even makes it possible to use
- * libraries that don't have all of the libpng ancillary chunk-handing
- * functionality.  In libpng-1.5.0 this was moved into a separate private
- * file that is not visible to applications.
- *
- * The following members may have allocated storage attached that should be
- * cleaned up before the structure is discarded: palette, trans, text,
- * pcal_purpose, pcal_units, pcal_params, hist, iccp_name, iccp_profile,
- * splt_palettes, scal_unit, row_pointers, and unknowns.   By default, these
- * are automatically freed when the info structure is deallocated, if they were
- * allocated internally by libpng.  This behavior can be changed by means
- * of the png_data_freer() function.
- *
- * More allocation details: all the chunk-reading functions that
- * change these members go through the corresponding png_set_*
- * functions.  A function to clear these members is available: see
- * png_free_data().  The png_set_* functions do not depend on being
- * able to point info structure members to any of the storage they are
- * passed (they make their own copies), EXCEPT that the png_set_text
- * functions use the same storage passed to them in the text_ptr or
- * itxt_ptr structure argument, and the png_set_rows and png_set_unknowns
- * functions do not make their own copies.
- */
-#ifndef PNGINFO_H
-#define PNGINFO_H
-
-struct png_info_def
-{
-   /* The following are necessary for every PNG file */
-   png_uint_32 width;       /* width of image in pixels (from IHDR) */
-   png_uint_32 height;      /* height of image in pixels (from IHDR) */
-   png_uint_32 valid;       /* valid chunk data (see PNG_INFO_ below) */
-   size_t rowbytes;         /* bytes needed to hold an untransformed row */
-   png_colorp palette;      /* array of color values (valid & PNG_INFO_PLTE) */
-   png_uint_16 num_palette; /* number of color entries in "palette" (PLTE) */
-   png_uint_16 num_trans;   /* number of transparent palette color (tRNS) */
-   png_byte bit_depth;      /* 1, 2, 4, 8, or 16 bits/channel (from IHDR) */
-   png_byte color_type;     /* see PNG_COLOR_TYPE_ below (from IHDR) */
-   /* The following three should have been named *_method not *_type */
-   png_byte compression_type; /* must be PNG_COMPRESSION_TYPE_BASE (IHDR) */
-   png_byte filter_type;    /* must be PNG_FILTER_TYPE_BASE (from IHDR) */
-   png_byte interlace_type; /* One of PNG_INTERLACE_NONE, PNG_INTERLACE_ADAM7 */
-
-   /* The following are set by png_set_IHDR, called from the application on
-    * write, but the are never actually used by the write code.
-    */
-   png_byte channels;       /* number of data channels per pixel (1, 2, 3, 4) */
-   png_byte pixel_depth;    /* number of bits per pixel */
-   png_byte spare_byte;     /* to align the data, and for future use */
-
-#ifdef PNG_READ_SUPPORTED
-   /* This is never set during write */
-   png_byte signature[8];   /* magic bytes read by libpng from start of file */
-#endif
-
-   /* The rest of the data is optional.  If you are reading, check the
-    * valid field to see if the information in these are valid.  If you
-    * are writing, set the valid field to those chunks you want written,
-    * and initialize the appropriate fields below.
-    */
-
-#if defined(PNG_COLORSPACE_SUPPORTED) || defined(PNG_GAMMA_SUPPORTED)
-   /* png_colorspace only contains 'flags' if neither GAMMA or COLORSPACE are
-    * defined.  When COLORSPACE is switched on all the colorspace-defining
-    * chunks should be enabled, when GAMMA is switched on all the gamma-defining
-    * chunks should be enabled.  If this is not done it becomes possible to read
-    * inconsistent PNG files and assign a probably incorrect interpretation to
-    * the information.  (In other words, by carefully choosing which chunks to
-    * recognize the system configuration can select an interpretation for PNG
-    * files containing ambiguous data and this will result in inconsistent
-    * behavior between different libpng builds!)
-    */
-   png_colorspace colorspace;
-#endif
-
-#ifdef PNG_iCCP_SUPPORTED
-   /* iCCP chunk data. */
-   png_charp iccp_name;     /* profile name */
-   png_bytep iccp_profile;  /* International Color Consortium profile data */
-   png_uint_32 iccp_proflen;  /* ICC profile data length */
-#endif
-
-#ifdef PNG_TEXT_SUPPORTED
-   /* The tEXt, and zTXt chunks contain human-readable textual data in
-    * uncompressed, compressed, and optionally compressed forms, respectively.
-    * The data in "text" is an array of pointers to uncompressed,
-    * null-terminated C strings. Each chunk has a keyword that describes the
-    * textual data contained in that chunk.  Keywords are not required to be
-    * unique, and the text string may be empty.  Any number of text chunks may
-    * be in an image.
-    */
-   int num_text; /* number of comments read or comments to write */
-   int max_text; /* current size of text array */
-   png_textp text; /* array of comments read or comments to write */
-#endif /* TEXT */
-
-#ifdef PNG_tIME_SUPPORTED
-   /* The tIME chunk holds the last time the displayed image data was
-    * modified.  See the png_time struct for the contents of this struct.
-    */
-   png_time mod_time;
-#endif
-
-#ifdef PNG_sBIT_SUPPORTED
-   /* The sBIT chunk specifies the number of significant high-order bits
-    * in the pixel data.  Values are in the range [1, bit_depth], and are
-    * only specified for the channels in the pixel data.  The contents of
-    * the low-order bits is not specified.  Data is valid if
-    * (valid & PNG_INFO_sBIT) is non-zero.
-    */
-   png_color_8 sig_bit; /* significant bits in color channels */
-#endif
-
-#if defined(PNG_tRNS_SUPPORTED) || defined(PNG_READ_EXPAND_SUPPORTED) || \
-defined(PNG_READ_BACKGROUND_SUPPORTED)
-   /* The tRNS chunk supplies transparency data for paletted images and
-    * other image types that don't need a full alpha channel.  There are
-    * "num_trans" transparency values for a paletted image, stored in the
-    * same order as the palette colors, starting from index 0.  Values
-    * for the data are in the range [0, 255], ranging from fully transparent
-    * to fully opaque, respectively.  For non-paletted images, there is a
-    * single color specified that should be treated as fully transparent.
-    * Data is valid if (valid & PNG_INFO_tRNS) is non-zero.
-    */
-   png_bytep trans_alpha;    /* alpha values for paletted image */
-   png_color_16 trans_color; /* transparent color for non-palette image */
-#endif
-
-#if defined(PNG_bKGD_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
-   /* The bKGD chunk gives the suggested image background color if the
-    * display program does not have its own background color and the image
-    * is needs to composited onto a background before display.  The colors
-    * in "background" are normally in the same color space/depth as the
-    * pixel data.  Data is valid if (valid & PNG_INFO_bKGD) is non-zero.
-    */
-   png_color_16 background;
-#endif
-
-#ifdef PNG_oFFs_SUPPORTED
-   /* The oFFs chunk gives the offset in "offset_unit_type" units rightwards
-    * and downwards from the top-left corner of the display, page, or other
-    * application-specific co-ordinate space.  See the PNG_OFFSET_ defines
-    * below for the unit types.  Valid if (valid & PNG_INFO_oFFs) non-zero.
-    */
-   png_int_32 x_offset; /* x offset on page */
-   png_int_32 y_offset; /* y offset on page */
-   png_byte offset_unit_type; /* offset units type */
-#endif
-
-#ifdef PNG_pHYs_SUPPORTED
-   /* The pHYs chunk gives the physical pixel density of the image for
-    * display or printing in "phys_unit_type" units (see PNG_RESOLUTION_
-    * defines below).  Data is valid if (valid & PNG_INFO_pHYs) is non-zero.
-    */
-   png_uint_32 x_pixels_per_unit; /* horizontal pixel density */
-   png_uint_32 y_pixels_per_unit; /* vertical pixel density */
-   png_byte phys_unit_type; /* resolution type (see PNG_RESOLUTION_ below) */
-#endif
-
-#ifdef PNG_eXIf_SUPPORTED
-   int num_exif;  /* Added at libpng-1.6.31 */
-   png_bytep exif;
-# ifdef PNG_READ_eXIf_SUPPORTED
-   png_bytep eXIf_buf;  /* Added at libpng-1.6.32 */
-# endif
-#endif
-
-#ifdef PNG_hIST_SUPPORTED
-   /* The hIST chunk contains the relative frequency or importance of the
-    * various palette entries, so that a viewer can intelligently select a
-    * reduced-color palette, if required.  Data is an array of "num_palette"
-    * values in the range [0,65535]. Data valid if (valid & PNG_INFO_hIST)
-    * is non-zero.
-    */
-   png_uint_16p hist;
-#endif
-
-#ifdef PNG_pCAL_SUPPORTED
-   /* The pCAL chunk describes a transformation between the stored pixel
-    * values and original physical data values used to create the image.
-    * The integer range [0, 2^bit_depth - 1] maps to the floating-point
-    * range given by [pcal_X0, pcal_X1], and are further transformed by a
-    * (possibly non-linear) transformation function given by "pcal_type"
-    * and "pcal_params" into "pcal_units".  Please see the PNG_EQUATION_
-    * defines below, and the PNG-Group's PNG extensions document for a
-    * complete description of the transformations and how they should be
-    * implemented, and for a description of the ASCII parameter strings.
-    * Data values are valid if (valid & PNG_INFO_pCAL) non-zero.
-    */
-   png_charp pcal_purpose;  /* pCAL chunk description string */
-   png_int_32 pcal_X0;      /* minimum value */
-   png_int_32 pcal_X1;      /* maximum value */
-   png_charp pcal_units;    /* Latin-1 string giving physical units */
-   png_charpp pcal_params;  /* ASCII strings containing parameter values */
-   png_byte pcal_type;      /* equation type (see PNG_EQUATION_ below) */
-   png_byte pcal_nparams;   /* number of parameters given in pcal_params */
-#endif
-
-/* New members added in libpng-1.0.6 */
-   png_uint_32 free_me;     /* flags items libpng is responsible for freeing */
-
-#ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED
-   /* Storage for unknown chunks that the library doesn't recognize. */
-   png_unknown_chunkp unknown_chunks;
-
-   /* The type of this field is limited by the type of
-    * png_struct::user_chunk_cache_max, else overflow can occur.
-    */
-   int                unknown_chunks_num;
-#endif
-
-#ifdef PNG_sPLT_SUPPORTED
-   /* Data on sPLT chunks (there may be more than one). */
-   png_sPLT_tp splt_palettes;
-   int         splt_palettes_num; /* Match type returned by png_get API */
-#endif
-
-#ifdef PNG_sCAL_SUPPORTED
-   /* The sCAL chunk describes the actual physical dimensions of the
-    * subject matter of the graphic.  The chunk contains a unit specification
-    * a byte value, and two ASCII strings representing floating-point
-    * values.  The values are width and height corresponding to one pixel
-    * in the image.  Data values are valid if (valid & PNG_INFO_sCAL) is
-    * non-zero.
-    */
-   png_byte scal_unit;         /* unit of physical scale */
-   png_charp scal_s_width;     /* string containing height */
-   png_charp scal_s_height;    /* string containing width */
-#endif
-
-#ifdef PNG_INFO_IMAGE_SUPPORTED
-   /* Memory has been allocated if (valid & PNG_ALLOCATED_INFO_ROWS)
-      non-zero */
-   /* Data valid if (valid & PNG_INFO_IDAT) non-zero */
-   png_bytepp row_pointers;        /* the image bits */
-#endif
-
-};
-#endif /* PNGINFO_H */
diff --git a/dep/libpng/src/pngmem.c b/dep/libpng/src/pngmem.c
deleted file mode 100644
index 09ed9c1c9..000000000
--- a/dep/libpng/src/pngmem.c
+++ /dev/null
@@ -1,284 +0,0 @@
-
-/* pngmem.c - stub functions for memory allocation
- *
- * Copyright (c) 2018 Cosmin Truta
- * Copyright (c) 1998-2002,2004,2006-2014,2016 Glenn Randers-Pehrson
- * Copyright (c) 1996-1997 Andreas Dilger
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- *
- * This file provides a location for all memory allocation.  Users who
- * need special memory handling are expected to supply replacement
- * functions for png_malloc() and png_free(), and to use
- * png_create_read_struct_2() and png_create_write_struct_2() to
- * identify the replacement functions.
- */
-
-#include "pngpriv.h"
-
-#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
-/* Free a png_struct */
-void /* PRIVATE */
-png_destroy_png_struct(png_structrp png_ptr)
-{
-   if (png_ptr != NULL)
-   {
-      /* png_free might call png_error and may certainly call
-       * png_get_mem_ptr, so fake a temporary png_struct to support this.
-       */
-      png_struct dummy_struct = *png_ptr;
-      memset(png_ptr, 0, (sizeof *png_ptr));
-      png_free(&dummy_struct, png_ptr);
-
-#     ifdef PNG_SETJMP_SUPPORTED
-         /* We may have a jmp_buf left to deallocate. */
-         png_free_jmpbuf(&dummy_struct);
-#     endif
-   }
-}
-
-/* Allocate memory.  For reasonable files, size should never exceed
- * 64K.  However, zlib may allocate more than 64K if you don't tell
- * it not to.  See zconf.h and png.h for more information.  zlib does
- * need to allocate exactly 64K, so whatever you call here must
- * have the ability to do that.
- */
-PNG_FUNCTION(png_voidp,PNGAPI
-png_calloc,(png_const_structrp png_ptr, png_alloc_size_t size),PNG_ALLOCATED)
-{
-   png_voidp ret;
-
-   ret = png_malloc(png_ptr, size);
-
-   if (ret != NULL)
-      memset(ret, 0, size);
-
-   return ret;
-}
-
-/* png_malloc_base, an internal function added at libpng 1.6.0, does the work of
- * allocating memory, taking into account limits and PNG_USER_MEM_SUPPORTED.
- * Checking and error handling must happen outside this routine; it returns NULL
- * if the allocation cannot be done (for any reason.)
- */
-PNG_FUNCTION(png_voidp /* PRIVATE */,
-png_malloc_base,(png_const_structrp png_ptr, png_alloc_size_t size),
-    PNG_ALLOCATED)
-{
-   /* Moved to png_malloc_base from png_malloc_default in 1.6.0; the DOS
-    * allocators have also been removed in 1.6.0, so any 16-bit system now has
-    * to implement a user memory handler.  This checks to be sure it isn't
-    * called with big numbers.
-    */
-#ifndef PNG_USER_MEM_SUPPORTED
-   PNG_UNUSED(png_ptr)
-#endif
-
-   /* Some compilers complain that this is always true.  However, it
-    * can be false when integer overflow happens.
-    */
-   if (size > 0 && size <= PNG_SIZE_MAX
-#     ifdef PNG_MAX_MALLOC_64K
-         && size <= 65536U
-#     endif
-      )
-   {
-#ifdef PNG_USER_MEM_SUPPORTED
-      if (png_ptr != NULL && png_ptr->malloc_fn != NULL)
-         return png_ptr->malloc_fn(png_constcast(png_structrp,png_ptr), size);
-
-      else
-#endif
-         return malloc((size_t)size); /* checked for truncation above */
-   }
-
-   else
-      return NULL;
-}
-
-#if defined(PNG_TEXT_SUPPORTED) || defined(PNG_sPLT_SUPPORTED) ||\
-   defined(PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED)
-/* This is really here only to work round a spurious warning in GCC 4.6 and 4.7
- * that arises because of the checks in png_realloc_array that are repeated in
- * png_malloc_array.
- */
-static png_voidp
-png_malloc_array_checked(png_const_structrp png_ptr, int nelements,
-    size_t element_size)
-{
-   png_alloc_size_t req = (png_alloc_size_t)nelements; /* known to be > 0 */
-
-   if (req <= PNG_SIZE_MAX/element_size)
-      return png_malloc_base(png_ptr, req * element_size);
-
-   /* The failure case when the request is too large */
-   return NULL;
-}
-
-PNG_FUNCTION(png_voidp /* PRIVATE */,
-png_malloc_array,(png_const_structrp png_ptr, int nelements,
-    size_t element_size),PNG_ALLOCATED)
-{
-   if (nelements <= 0 || element_size == 0)
-      png_error(png_ptr, "internal error: array alloc");
-
-   return png_malloc_array_checked(png_ptr, nelements, element_size);
-}
-
-PNG_FUNCTION(png_voidp /* PRIVATE */,
-png_realloc_array,(png_const_structrp png_ptr, png_const_voidp old_array,
-    int old_elements, int add_elements, size_t element_size),PNG_ALLOCATED)
-{
-   /* These are internal errors: */
-   if (add_elements <= 0 || element_size == 0 || old_elements < 0 ||
-      (old_array == NULL && old_elements > 0))
-      png_error(png_ptr, "internal error: array realloc");
-
-   /* Check for overflow on the elements count (so the caller does not have to
-    * check.)
-    */
-   if (add_elements <= INT_MAX - old_elements)
-   {
-      png_voidp new_array = png_malloc_array_checked(png_ptr,
-          old_elements+add_elements, element_size);
-
-      if (new_array != NULL)
-      {
-         /* Because png_malloc_array worked the size calculations below cannot
-          * overflow.
-          */
-         if (old_elements > 0)
-            memcpy(new_array, old_array, element_size*(unsigned)old_elements);
-
-         memset((char*)new_array + element_size*(unsigned)old_elements, 0,
-             element_size*(unsigned)add_elements);
-
-         return new_array;
-      }
-   }
-
-   return NULL; /* error */
-}
-#endif /* TEXT || sPLT || STORE_UNKNOWN_CHUNKS */
-
-/* Various functions that have different error handling are derived from this.
- * png_malloc always exists, but if PNG_USER_MEM_SUPPORTED is defined a separate
- * function png_malloc_default is also provided.
- */
-PNG_FUNCTION(png_voidp,PNGAPI
-png_malloc,(png_const_structrp png_ptr, png_alloc_size_t size),PNG_ALLOCATED)
-{
-   png_voidp ret;
-
-   if (png_ptr == NULL)
-      return NULL;
-
-   ret = png_malloc_base(png_ptr, size);
-
-   if (ret == NULL)
-       png_error(png_ptr, "Out of memory"); /* 'm' means png_malloc */
-
-   return ret;
-}
-
-#ifdef PNG_USER_MEM_SUPPORTED
-PNG_FUNCTION(png_voidp,PNGAPI
-png_malloc_default,(png_const_structrp png_ptr, png_alloc_size_t size),
-    PNG_ALLOCATED PNG_DEPRECATED)
-{
-   png_voidp ret;
-
-   if (png_ptr == NULL)
-      return NULL;
-
-   /* Passing 'NULL' here bypasses the application provided memory handler. */
-   ret = png_malloc_base(NULL/*use malloc*/, size);
-
-   if (ret == NULL)
-      png_error(png_ptr, "Out of Memory"); /* 'M' means png_malloc_default */
-
-   return ret;
-}
-#endif /* USER_MEM */
-
-/* This function was added at libpng version 1.2.3.  The png_malloc_warn()
- * function will issue a png_warning and return NULL instead of issuing a
- * png_error, if it fails to allocate the requested memory.
- */
-PNG_FUNCTION(png_voidp,PNGAPI
-png_malloc_warn,(png_const_structrp png_ptr, png_alloc_size_t size),
-    PNG_ALLOCATED)
-{
-   if (png_ptr != NULL)
-   {
-      png_voidp ret = png_malloc_base(png_ptr, size);
-
-      if (ret != NULL)
-         return ret;
-
-      png_warning(png_ptr, "Out of memory");
-   }
-
-   return NULL;
-}
-
-/* Free a pointer allocated by png_malloc().  If ptr is NULL, return
- * without taking any action.
- */
-void PNGAPI
-png_free(png_const_structrp png_ptr, png_voidp ptr)
-{
-   if (png_ptr == NULL || ptr == NULL)
-      return;
-
-#ifdef PNG_USER_MEM_SUPPORTED
-   if (png_ptr->free_fn != NULL)
-      png_ptr->free_fn(png_constcast(png_structrp,png_ptr), ptr);
-
-   else
-      png_free_default(png_ptr, ptr);
-}
-
-PNG_FUNCTION(void,PNGAPI
-png_free_default,(png_const_structrp png_ptr, png_voidp ptr),PNG_DEPRECATED)
-{
-   if (png_ptr == NULL || ptr == NULL)
-      return;
-#endif /* USER_MEM */
-
-   free(ptr);
-}
-
-#ifdef PNG_USER_MEM_SUPPORTED
-/* This function is called when the application wants to use another method
- * of allocating and freeing memory.
- */
-void PNGAPI
-png_set_mem_fn(png_structrp png_ptr, png_voidp mem_ptr, png_malloc_ptr
-  malloc_fn, png_free_ptr free_fn)
-{
-   if (png_ptr != NULL)
-   {
-      png_ptr->mem_ptr = mem_ptr;
-      png_ptr->malloc_fn = malloc_fn;
-      png_ptr->free_fn = free_fn;
-   }
-}
-
-/* This function returns a pointer to the mem_ptr associated with the user
- * functions.  The application should free any memory associated with this
- * pointer before png_write_destroy and png_read_destroy are called.
- */
-png_voidp PNGAPI
-png_get_mem_ptr(png_const_structrp png_ptr)
-{
-   if (png_ptr == NULL)
-      return NULL;
-
-   return png_ptr->mem_ptr;
-}
-#endif /* USER_MEM */
-#endif /* READ || WRITE */
diff --git a/dep/libpng/src/pngpread.c b/dep/libpng/src/pngpread.c
deleted file mode 100644
index ffab19c08..000000000
--- a/dep/libpng/src/pngpread.c
+++ /dev/null
@@ -1,1104 +0,0 @@
-
-/* pngpread.c - read a png file in push mode
- *
- * Copyright (c) 2018-2024 Cosmin Truta
- * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
- * Copyright (c) 1996-1997 Andreas Dilger
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-#include "pngpriv.h"
-
-#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
-
-/* Push model modes */
-#define PNG_READ_SIG_MODE   0
-#define PNG_READ_CHUNK_MODE 1
-#define PNG_READ_IDAT_MODE  2
-#define PNG_READ_tEXt_MODE  4
-#define PNG_READ_zTXt_MODE  5
-#define PNG_READ_DONE_MODE  6
-#define PNG_READ_iTXt_MODE  7
-#define PNG_ERROR_MODE      8
-
-#define PNG_PUSH_SAVE_BUFFER_IF_FULL \
-if (png_ptr->push_length + 4 > png_ptr->buffer_size) \
-   { png_push_save_buffer(png_ptr); return; }
-#define PNG_PUSH_SAVE_BUFFER_IF_LT(N) \
-if (png_ptr->buffer_size < N) \
-   { png_push_save_buffer(png_ptr); return; }
-
-void PNGAPI
-png_process_data(png_structrp png_ptr, png_inforp info_ptr,
-    png_bytep buffer, size_t buffer_size)
-{
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   png_push_restore_buffer(png_ptr, buffer, buffer_size);
-
-   while (png_ptr->buffer_size)
-   {
-      png_process_some_data(png_ptr, info_ptr);
-   }
-}
-
-size_t PNGAPI
-png_process_data_pause(png_structrp png_ptr, int save)
-{
-   if (png_ptr != NULL)
-   {
-      /* It's easiest for the caller if we do the save; then the caller doesn't
-       * have to supply the same data again:
-       */
-      if (save != 0)
-         png_push_save_buffer(png_ptr);
-      else
-      {
-         /* This includes any pending saved bytes: */
-         size_t remaining = png_ptr->buffer_size;
-         png_ptr->buffer_size = 0;
-
-         /* So subtract the saved buffer size, unless all the data
-          * is actually 'saved', in which case we just return 0
-          */
-         if (png_ptr->save_buffer_size < remaining)
-            return remaining - png_ptr->save_buffer_size;
-      }
-   }
-
-   return 0;
-}
-
-png_uint_32 PNGAPI
-png_process_data_skip(png_structrp png_ptr)
-{
-/* TODO: Deprecate and remove this API.
- * Somewhere the implementation of this seems to have been lost,
- * or abandoned.  It was only to support some internal back-door access
- * to png_struct) in libpng-1.4.x.
- */
-   png_app_warning(png_ptr,
-"png_process_data_skip is not implemented in any current version of libpng");
-   return 0;
-}
-
-/* What we do with the incoming data depends on what we were previously
- * doing before we ran out of data...
- */
-void /* PRIVATE */
-png_process_some_data(png_structrp png_ptr, png_inforp info_ptr)
-{
-   if (png_ptr == NULL)
-      return;
-
-   switch (png_ptr->process_mode)
-   {
-      case PNG_READ_SIG_MODE:
-      {
-         png_push_read_sig(png_ptr, info_ptr);
-         break;
-      }
-
-      case PNG_READ_CHUNK_MODE:
-      {
-         png_push_read_chunk(png_ptr, info_ptr);
-         break;
-      }
-
-      case PNG_READ_IDAT_MODE:
-      {
-         png_push_read_IDAT(png_ptr);
-         break;
-      }
-
-      default:
-      {
-         png_ptr->buffer_size = 0;
-         break;
-      }
-   }
-}
-
-/* Read any remaining signature bytes from the stream and compare them with
- * the correct PNG signature.  It is possible that this routine is called
- * with bytes already read from the signature, either because they have been
- * checked by the calling application, or because of multiple calls to this
- * routine.
- */
-void /* PRIVATE */
-png_push_read_sig(png_structrp png_ptr, png_inforp info_ptr)
-{
-   size_t num_checked = png_ptr->sig_bytes; /* SAFE, does not exceed 8 */
-   size_t num_to_check = 8 - num_checked;
-
-   if (png_ptr->buffer_size < num_to_check)
-   {
-      num_to_check = png_ptr->buffer_size;
-   }
-
-   png_push_fill_buffer(png_ptr, &(info_ptr->signature[num_checked]),
-       num_to_check);
-   png_ptr->sig_bytes = (png_byte)(png_ptr->sig_bytes + num_to_check);
-
-   if (png_sig_cmp(info_ptr->signature, num_checked, num_to_check) != 0)
-   {
-      if (num_checked < 4 &&
-          png_sig_cmp(info_ptr->signature, num_checked, num_to_check - 4) != 0)
-         png_error(png_ptr, "Not a PNG file");
-
-      else
-         png_error(png_ptr, "PNG file corrupted by ASCII conversion");
-   }
-   else
-   {
-      if (png_ptr->sig_bytes >= 8)
-      {
-         png_ptr->process_mode = PNG_READ_CHUNK_MODE;
-      }
-   }
-}
-
-void /* PRIVATE */
-png_push_read_chunk(png_structrp png_ptr, png_inforp info_ptr)
-{
-   png_uint_32 chunk_name;
-#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-   int keep; /* unknown handling method */
-#endif
-
-   /* First we make sure we have enough data for the 4-byte chunk name
-    * and the 4-byte chunk length before proceeding with decoding the
-    * chunk data.  To fully decode each of these chunks, we also make
-    * sure we have enough data in the buffer for the 4-byte CRC at the
-    * end of every chunk (except IDAT, which is handled separately).
-    */
-   if ((png_ptr->mode & PNG_HAVE_CHUNK_HEADER) == 0)
-   {
-      png_byte chunk_length[4];
-      png_byte chunk_tag[4];
-
-      PNG_PUSH_SAVE_BUFFER_IF_LT(8)
-      png_push_fill_buffer(png_ptr, chunk_length, 4);
-      png_ptr->push_length = png_get_uint_31(png_ptr, chunk_length);
-      png_reset_crc(png_ptr);
-      png_crc_read(png_ptr, chunk_tag, 4);
-      png_ptr->chunk_name = PNG_CHUNK_FROM_STRING(chunk_tag);
-      png_check_chunk_name(png_ptr, png_ptr->chunk_name);
-      png_check_chunk_length(png_ptr, png_ptr->push_length);
-      png_ptr->mode |= PNG_HAVE_CHUNK_HEADER;
-   }
-
-   chunk_name = png_ptr->chunk_name;
-
-   if (chunk_name == png_IDAT)
-   {
-      if ((png_ptr->mode & PNG_AFTER_IDAT) != 0)
-         png_ptr->mode |= PNG_HAVE_CHUNK_AFTER_IDAT;
-
-      /* If we reach an IDAT chunk, this means we have read all of the
-       * header chunks, and we can start reading the image (or if this
-       * is called after the image has been read - we have an error).
-       */
-      if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
-         png_error(png_ptr, "Missing IHDR before IDAT");
-
-      else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
-          (png_ptr->mode & PNG_HAVE_PLTE) == 0)
-         png_error(png_ptr, "Missing PLTE before IDAT");
-
-      png_ptr->process_mode = PNG_READ_IDAT_MODE;
-
-      if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
-         if ((png_ptr->mode & PNG_HAVE_CHUNK_AFTER_IDAT) == 0)
-            if (png_ptr->push_length == 0)
-               return;
-
-      png_ptr->mode |= PNG_HAVE_IDAT;
-
-      if ((png_ptr->mode & PNG_AFTER_IDAT) != 0)
-         png_benign_error(png_ptr, "Too many IDATs found");
-   }
-
-   if (chunk_name == png_IHDR)
-   {
-      if (png_ptr->push_length != 13)
-         png_error(png_ptr, "Invalid IHDR length");
-
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_IHDR(png_ptr, info_ptr, png_ptr->push_length);
-   }
-
-   else if (chunk_name == png_IEND)
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_IEND(png_ptr, info_ptr, png_ptr->push_length);
-
-      png_ptr->process_mode = PNG_READ_DONE_MODE;
-      png_push_have_end(png_ptr, info_ptr);
-   }
-
-#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-   else if ((keep = png_chunk_unknown_handling(png_ptr, chunk_name)) != 0)
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_unknown(png_ptr, info_ptr, png_ptr->push_length, keep);
-
-      if (chunk_name == png_PLTE)
-         png_ptr->mode |= PNG_HAVE_PLTE;
-   }
-#endif
-
-   else if (chunk_name == png_PLTE)
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_PLTE(png_ptr, info_ptr, png_ptr->push_length);
-   }
-
-   else if (chunk_name == png_IDAT)
-   {
-      png_ptr->idat_size = png_ptr->push_length;
-      png_ptr->process_mode = PNG_READ_IDAT_MODE;
-      png_push_have_info(png_ptr, info_ptr);
-      png_ptr->zstream.avail_out =
-          (uInt) PNG_ROWBYTES(png_ptr->pixel_depth,
-          png_ptr->iwidth) + 1;
-      png_ptr->zstream.next_out = png_ptr->row_buf;
-      return;
-   }
-
-#ifdef PNG_READ_gAMA_SUPPORTED
-   else if (png_ptr->chunk_name == png_gAMA)
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_gAMA(png_ptr, info_ptr, png_ptr->push_length);
-   }
-
-#endif
-#ifdef PNG_READ_sBIT_SUPPORTED
-   else if (png_ptr->chunk_name == png_sBIT)
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_sBIT(png_ptr, info_ptr, png_ptr->push_length);
-   }
-
-#endif
-#ifdef PNG_READ_cHRM_SUPPORTED
-   else if (png_ptr->chunk_name == png_cHRM)
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_cHRM(png_ptr, info_ptr, png_ptr->push_length);
-   }
-
-#endif
-#ifdef PNG_READ_eXIf_SUPPORTED
-   else if (png_ptr->chunk_name == png_eXIf)
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_eXIf(png_ptr, info_ptr, png_ptr->push_length);
-   }
-
-#endif
-#ifdef PNG_READ_sRGB_SUPPORTED
-   else if (chunk_name == png_sRGB)
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_sRGB(png_ptr, info_ptr, png_ptr->push_length);
-   }
-
-#endif
-#ifdef PNG_READ_iCCP_SUPPORTED
-   else if (png_ptr->chunk_name == png_iCCP)
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_iCCP(png_ptr, info_ptr, png_ptr->push_length);
-   }
-
-#endif
-#ifdef PNG_READ_sPLT_SUPPORTED
-   else if (chunk_name == png_sPLT)
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_sPLT(png_ptr, info_ptr, png_ptr->push_length);
-   }
-
-#endif
-#ifdef PNG_READ_tRNS_SUPPORTED
-   else if (chunk_name == png_tRNS)
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_tRNS(png_ptr, info_ptr, png_ptr->push_length);
-   }
-
-#endif
-#ifdef PNG_READ_bKGD_SUPPORTED
-   else if (chunk_name == png_bKGD)
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_bKGD(png_ptr, info_ptr, png_ptr->push_length);
-   }
-
-#endif
-#ifdef PNG_READ_hIST_SUPPORTED
-   else if (chunk_name == png_hIST)
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_hIST(png_ptr, info_ptr, png_ptr->push_length);
-   }
-
-#endif
-#ifdef PNG_READ_pHYs_SUPPORTED
-   else if (chunk_name == png_pHYs)
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_pHYs(png_ptr, info_ptr, png_ptr->push_length);
-   }
-
-#endif
-#ifdef PNG_READ_oFFs_SUPPORTED
-   else if (chunk_name == png_oFFs)
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_oFFs(png_ptr, info_ptr, png_ptr->push_length);
-   }
-#endif
-
-#ifdef PNG_READ_pCAL_SUPPORTED
-   else if (chunk_name == png_pCAL)
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_pCAL(png_ptr, info_ptr, png_ptr->push_length);
-   }
-
-#endif
-#ifdef PNG_READ_sCAL_SUPPORTED
-   else if (chunk_name == png_sCAL)
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_sCAL(png_ptr, info_ptr, png_ptr->push_length);
-   }
-
-#endif
-#ifdef PNG_READ_tIME_SUPPORTED
-   else if (chunk_name == png_tIME)
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_tIME(png_ptr, info_ptr, png_ptr->push_length);
-   }
-
-#endif
-#ifdef PNG_READ_tEXt_SUPPORTED
-   else if (chunk_name == png_tEXt)
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_tEXt(png_ptr, info_ptr, png_ptr->push_length);
-   }
-
-#endif
-#ifdef PNG_READ_zTXt_SUPPORTED
-   else if (chunk_name == png_zTXt)
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_zTXt(png_ptr, info_ptr, png_ptr->push_length);
-   }
-
-#endif
-#ifdef PNG_READ_iTXt_SUPPORTED
-   else if (chunk_name == png_iTXt)
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_iTXt(png_ptr, info_ptr, png_ptr->push_length);
-   }
-#endif
-
-   else
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_FULL
-      png_handle_unknown(png_ptr, info_ptr, png_ptr->push_length,
-          PNG_HANDLE_CHUNK_AS_DEFAULT);
-   }
-
-   png_ptr->mode &= ~PNG_HAVE_CHUNK_HEADER;
-}
-
-void PNGCBAPI
-png_push_fill_buffer(png_structp png_ptr, png_bytep buffer, size_t length)
-{
-   png_bytep ptr;
-
-   if (png_ptr == NULL)
-      return;
-
-   ptr = buffer;
-   if (png_ptr->save_buffer_size != 0)
-   {
-      size_t save_size;
-
-      if (length < png_ptr->save_buffer_size)
-         save_size = length;
-
-      else
-         save_size = png_ptr->save_buffer_size;
-
-      memcpy(ptr, png_ptr->save_buffer_ptr, save_size);
-      length -= save_size;
-      ptr += save_size;
-      png_ptr->buffer_size -= save_size;
-      png_ptr->save_buffer_size -= save_size;
-      png_ptr->save_buffer_ptr += save_size;
-   }
-   if (length != 0 && png_ptr->current_buffer_size != 0)
-   {
-      size_t save_size;
-
-      if (length < png_ptr->current_buffer_size)
-         save_size = length;
-
-      else
-         save_size = png_ptr->current_buffer_size;
-
-      memcpy(ptr, png_ptr->current_buffer_ptr, save_size);
-      png_ptr->buffer_size -= save_size;
-      png_ptr->current_buffer_size -= save_size;
-      png_ptr->current_buffer_ptr += save_size;
-   }
-}
-
-void /* PRIVATE */
-png_push_save_buffer(png_structrp png_ptr)
-{
-   if (png_ptr->save_buffer_size != 0)
-   {
-      if (png_ptr->save_buffer_ptr != png_ptr->save_buffer)
-      {
-         size_t i, istop;
-         png_bytep sp;
-         png_bytep dp;
-
-         istop = png_ptr->save_buffer_size;
-         for (i = 0, sp = png_ptr->save_buffer_ptr, dp = png_ptr->save_buffer;
-             i < istop; i++, sp++, dp++)
-         {
-            *dp = *sp;
-         }
-      }
-   }
-   if (png_ptr->save_buffer_size + png_ptr->current_buffer_size >
-       png_ptr->save_buffer_max)
-   {
-      size_t new_max;
-      png_bytep old_buffer;
-
-      if (png_ptr->save_buffer_size > PNG_SIZE_MAX -
-          (png_ptr->current_buffer_size + 256))
-      {
-         png_error(png_ptr, "Potential overflow of save_buffer");
-      }
-
-      new_max = png_ptr->save_buffer_size + png_ptr->current_buffer_size + 256;
-      old_buffer = png_ptr->save_buffer;
-      png_ptr->save_buffer = (png_bytep)png_malloc_warn(png_ptr,
-          (size_t)new_max);
-
-      if (png_ptr->save_buffer == NULL)
-      {
-         png_free(png_ptr, old_buffer);
-         png_error(png_ptr, "Insufficient memory for save_buffer");
-      }
-
-      if (old_buffer)
-         memcpy(png_ptr->save_buffer, old_buffer, png_ptr->save_buffer_size);
-      else if (png_ptr->save_buffer_size)
-         png_error(png_ptr, "save_buffer error");
-      png_free(png_ptr, old_buffer);
-      png_ptr->save_buffer_max = new_max;
-   }
-   if (png_ptr->current_buffer_size)
-   {
-      memcpy(png_ptr->save_buffer + png_ptr->save_buffer_size,
-         png_ptr->current_buffer_ptr, png_ptr->current_buffer_size);
-      png_ptr->save_buffer_size += png_ptr->current_buffer_size;
-      png_ptr->current_buffer_size = 0;
-   }
-   png_ptr->save_buffer_ptr = png_ptr->save_buffer;
-   png_ptr->buffer_size = 0;
-}
-
-void /* PRIVATE */
-png_push_restore_buffer(png_structrp png_ptr, png_bytep buffer,
-    size_t buffer_length)
-{
-   png_ptr->current_buffer = buffer;
-   png_ptr->current_buffer_size = buffer_length;
-   png_ptr->buffer_size = buffer_length + png_ptr->save_buffer_size;
-   png_ptr->current_buffer_ptr = png_ptr->current_buffer;
-}
-
-void /* PRIVATE */
-png_push_read_IDAT(png_structrp png_ptr)
-{
-   if ((png_ptr->mode & PNG_HAVE_CHUNK_HEADER) == 0)
-   {
-      png_byte chunk_length[4];
-      png_byte chunk_tag[4];
-
-      /* TODO: this code can be commoned up with the same code in push_read */
-      PNG_PUSH_SAVE_BUFFER_IF_LT(8)
-      png_push_fill_buffer(png_ptr, chunk_length, 4);
-      png_ptr->push_length = png_get_uint_31(png_ptr, chunk_length);
-      png_reset_crc(png_ptr);
-      png_crc_read(png_ptr, chunk_tag, 4);
-      png_ptr->chunk_name = PNG_CHUNK_FROM_STRING(chunk_tag);
-      png_ptr->mode |= PNG_HAVE_CHUNK_HEADER;
-
-      if (png_ptr->chunk_name != png_IDAT)
-      {
-         png_ptr->process_mode = PNG_READ_CHUNK_MODE;
-
-         if ((png_ptr->flags & PNG_FLAG_ZSTREAM_ENDED) == 0)
-            png_error(png_ptr, "Not enough compressed data");
-
-         return;
-      }
-
-      png_ptr->idat_size = png_ptr->push_length;
-   }
-
-   if (png_ptr->idat_size != 0 && png_ptr->save_buffer_size != 0)
-   {
-      size_t save_size = png_ptr->save_buffer_size;
-      png_uint_32 idat_size = png_ptr->idat_size;
-
-      /* We want the smaller of 'idat_size' and 'current_buffer_size', but they
-       * are of different types and we don't know which variable has the fewest
-       * bits.  Carefully select the smaller and cast it to the type of the
-       * larger - this cannot overflow.  Do not cast in the following test - it
-       * will break on either 16-bit or 64-bit platforms.
-       */
-      if (idat_size < save_size)
-         save_size = (size_t)idat_size;
-
-      else
-         idat_size = (png_uint_32)save_size;
-
-      png_calculate_crc(png_ptr, png_ptr->save_buffer_ptr, save_size);
-
-      png_process_IDAT_data(png_ptr, png_ptr->save_buffer_ptr, save_size);
-
-      png_ptr->idat_size -= idat_size;
-      png_ptr->buffer_size -= save_size;
-      png_ptr->save_buffer_size -= save_size;
-      png_ptr->save_buffer_ptr += save_size;
-   }
-
-   if (png_ptr->idat_size != 0 && png_ptr->current_buffer_size != 0)
-   {
-      size_t save_size = png_ptr->current_buffer_size;
-      png_uint_32 idat_size = png_ptr->idat_size;
-
-      /* We want the smaller of 'idat_size' and 'current_buffer_size', but they
-       * are of different types and we don't know which variable has the fewest
-       * bits.  Carefully select the smaller and cast it to the type of the
-       * larger - this cannot overflow.
-       */
-      if (idat_size < save_size)
-         save_size = (size_t)idat_size;
-
-      else
-         idat_size = (png_uint_32)save_size;
-
-      png_calculate_crc(png_ptr, png_ptr->current_buffer_ptr, save_size);
-
-      png_process_IDAT_data(png_ptr, png_ptr->current_buffer_ptr, save_size);
-
-      png_ptr->idat_size -= idat_size;
-      png_ptr->buffer_size -= save_size;
-      png_ptr->current_buffer_size -= save_size;
-      png_ptr->current_buffer_ptr += save_size;
-   }
-
-   if (png_ptr->idat_size == 0)
-   {
-      PNG_PUSH_SAVE_BUFFER_IF_LT(4)
-      png_crc_finish(png_ptr, 0);
-      png_ptr->mode &= ~PNG_HAVE_CHUNK_HEADER;
-      png_ptr->mode |= PNG_AFTER_IDAT;
-      png_ptr->zowner = 0;
-   }
-}
-
-void /* PRIVATE */
-png_process_IDAT_data(png_structrp png_ptr, png_bytep buffer,
-    size_t buffer_length)
-{
-   /* The caller checks for a non-zero buffer length. */
-   if (!(buffer_length > 0) || buffer == NULL)
-      png_error(png_ptr, "No IDAT data (internal error)");
-
-   /* This routine must process all the data it has been given
-    * before returning, calling the row callback as required to
-    * handle the uncompressed results.
-    */
-   png_ptr->zstream.next_in = buffer;
-   /* TODO: WARNING: TRUNCATION ERROR: DANGER WILL ROBINSON: */
-   png_ptr->zstream.avail_in = (uInt)buffer_length;
-
-   /* Keep going until the decompressed data is all processed
-    * or the stream marked as finished.
-    */
-   while (png_ptr->zstream.avail_in > 0 &&
-      (png_ptr->flags & PNG_FLAG_ZSTREAM_ENDED) == 0)
-   {
-      int ret;
-
-      /* We have data for zlib, but we must check that zlib
-       * has someplace to put the results.  It doesn't matter
-       * if we don't expect any results -- it may be the input
-       * data is just the LZ end code.
-       */
-      if (!(png_ptr->zstream.avail_out > 0))
-      {
-         /* TODO: WARNING: TRUNCATION ERROR: DANGER WILL ROBINSON: */
-         png_ptr->zstream.avail_out = (uInt)(PNG_ROWBYTES(png_ptr->pixel_depth,
-             png_ptr->iwidth) + 1);
-
-         png_ptr->zstream.next_out = png_ptr->row_buf;
-      }
-
-      /* Using Z_SYNC_FLUSH here means that an unterminated
-       * LZ stream (a stream with a missing end code) can still
-       * be handled, otherwise (Z_NO_FLUSH) a future zlib
-       * implementation might defer output and therefore
-       * change the current behavior (see comments in inflate.c
-       * for why this doesn't happen at present with zlib 1.2.5).
-       */
-      ret = PNG_INFLATE(png_ptr, Z_SYNC_FLUSH);
-
-      /* Check for any failure before proceeding. */
-      if (ret != Z_OK && ret != Z_STREAM_END)
-      {
-         /* Terminate the decompression. */
-         png_ptr->flags |= PNG_FLAG_ZSTREAM_ENDED;
-         png_ptr->zowner = 0;
-
-         /* This may be a truncated stream (missing or
-          * damaged end code).  Treat that as a warning.
-          */
-         if (png_ptr->row_number >= png_ptr->num_rows ||
-             png_ptr->pass > 6)
-            png_warning(png_ptr, "Truncated compressed data in IDAT");
-
-         else
-         {
-            if (ret == Z_DATA_ERROR)
-               png_benign_error(png_ptr, "IDAT: ADLER32 checksum mismatch");
-            else
-               png_error(png_ptr, "Decompression error in IDAT");
-         }
-
-         /* Skip the check on unprocessed input */
-         return;
-      }
-
-      /* Did inflate output any data? */
-      if (png_ptr->zstream.next_out != png_ptr->row_buf)
-      {
-         /* Is this unexpected data after the last row?
-          * If it is, artificially terminate the LZ output
-          * here.
-          */
-         if (png_ptr->row_number >= png_ptr->num_rows ||
-             png_ptr->pass > 6)
-         {
-            /* Extra data. */
-            png_warning(png_ptr, "Extra compressed data in IDAT");
-            png_ptr->flags |= PNG_FLAG_ZSTREAM_ENDED;
-            png_ptr->zowner = 0;
-
-            /* Do no more processing; skip the unprocessed
-             * input check below.
-             */
-            return;
-         }
-
-         /* Do we have a complete row? */
-         if (png_ptr->zstream.avail_out == 0)
-            png_push_process_row(png_ptr);
-      }
-
-      /* And check for the end of the stream. */
-      if (ret == Z_STREAM_END)
-         png_ptr->flags |= PNG_FLAG_ZSTREAM_ENDED;
-   }
-
-   /* All the data should have been processed, if anything
-    * is left at this point we have bytes of IDAT data
-    * after the zlib end code.
-    */
-   if (png_ptr->zstream.avail_in > 0)
-      png_warning(png_ptr, "Extra compression data in IDAT");
-}
-
-void /* PRIVATE */
-png_push_process_row(png_structrp png_ptr)
-{
-   /* 1.5.6: row_info moved out of png_struct to a local here. */
-   png_row_info row_info;
-
-   row_info.width = png_ptr->iwidth; /* NOTE: width of current interlaced row */
-   row_info.color_type = png_ptr->color_type;
-   row_info.bit_depth = png_ptr->bit_depth;
-   row_info.channels = png_ptr->channels;
-   row_info.pixel_depth = png_ptr->pixel_depth;
-   row_info.rowbytes = PNG_ROWBYTES(row_info.pixel_depth, row_info.width);
-
-   if (png_ptr->row_buf[0] > PNG_FILTER_VALUE_NONE)
-   {
-      if (png_ptr->row_buf[0] < PNG_FILTER_VALUE_LAST)
-         png_read_filter_row(png_ptr, &row_info, png_ptr->row_buf + 1,
-            png_ptr->prev_row + 1, png_ptr->row_buf[0]);
-      else
-         png_error(png_ptr, "bad adaptive filter value");
-   }
-
-   /* libpng 1.5.6: the following line was copying png_ptr->rowbytes before
-    * 1.5.6, while the buffer really is this big in current versions of libpng
-    * it may not be in the future, so this was changed just to copy the
-    * interlaced row count:
-    */
-   memcpy(png_ptr->prev_row, png_ptr->row_buf, row_info.rowbytes + 1);
-
-#ifdef PNG_READ_TRANSFORMS_SUPPORTED
-   if (png_ptr->transformations != 0)
-      png_do_read_transformations(png_ptr, &row_info);
-#endif
-
-   /* The transformed pixel depth should match the depth now in row_info. */
-   if (png_ptr->transformed_pixel_depth == 0)
-   {
-      png_ptr->transformed_pixel_depth = row_info.pixel_depth;
-      if (row_info.pixel_depth > png_ptr->maximum_pixel_depth)
-         png_error(png_ptr, "progressive row overflow");
-   }
-
-   else if (png_ptr->transformed_pixel_depth != row_info.pixel_depth)
-      png_error(png_ptr, "internal progressive row size calculation error");
-
-
-#ifdef PNG_READ_INTERLACING_SUPPORTED
-   /* Expand interlaced rows to full size */
-   if (png_ptr->interlaced != 0 &&
-       (png_ptr->transformations & PNG_INTERLACE) != 0)
-   {
-      if (png_ptr->pass < 6)
-         png_do_read_interlace(&row_info, png_ptr->row_buf + 1, png_ptr->pass,
-             png_ptr->transformations);
-
-      switch (png_ptr->pass)
-      {
-         case 0:
-         {
-            int i;
-            for (i = 0; i < 8 && png_ptr->pass == 0; i++)
-            {
-               png_push_have_row(png_ptr, png_ptr->row_buf + 1);
-               png_read_push_finish_row(png_ptr); /* Updates png_ptr->pass */
-            }
-
-            if (png_ptr->pass == 2) /* Pass 1 might be empty */
-            {
-               for (i = 0; i < 4 && png_ptr->pass == 2; i++)
-               {
-                  png_push_have_row(png_ptr, NULL);
-                  png_read_push_finish_row(png_ptr);
-               }
-            }
-
-            if (png_ptr->pass == 4 && png_ptr->height <= 4)
-            {
-               for (i = 0; i < 2 && png_ptr->pass == 4; i++)
-               {
-                  png_push_have_row(png_ptr, NULL);
-                  png_read_push_finish_row(png_ptr);
-               }
-            }
-
-            if (png_ptr->pass == 6 && png_ptr->height <= 4)
-            {
-                png_push_have_row(png_ptr, NULL);
-                png_read_push_finish_row(png_ptr);
-            }
-
-            break;
-         }
-
-         case 1:
-         {
-            int i;
-            for (i = 0; i < 8 && png_ptr->pass == 1; i++)
-            {
-               png_push_have_row(png_ptr, png_ptr->row_buf + 1);
-               png_read_push_finish_row(png_ptr);
-            }
-
-            if (png_ptr->pass == 2) /* Skip top 4 generated rows */
-            {
-               for (i = 0; i < 4 && png_ptr->pass == 2; i++)
-               {
-                  png_push_have_row(png_ptr, NULL);
-                  png_read_push_finish_row(png_ptr);
-               }
-            }
-
-            break;
-         }
-
-         case 2:
-         {
-            int i;
-
-            for (i = 0; i < 4 && png_ptr->pass == 2; i++)
-            {
-               png_push_have_row(png_ptr, png_ptr->row_buf + 1);
-               png_read_push_finish_row(png_ptr);
-            }
-
-            for (i = 0; i < 4 && png_ptr->pass == 2; i++)
-            {
-               png_push_have_row(png_ptr, NULL);
-               png_read_push_finish_row(png_ptr);
-            }
-
-            if (png_ptr->pass == 4) /* Pass 3 might be empty */
-            {
-               for (i = 0; i < 2 && png_ptr->pass == 4; i++)
-               {
-                  png_push_have_row(png_ptr, NULL);
-                  png_read_push_finish_row(png_ptr);
-               }
-            }
-
-            break;
-         }
-
-         case 3:
-         {
-            int i;
-
-            for (i = 0; i < 4 && png_ptr->pass == 3; i++)
-            {
-               png_push_have_row(png_ptr, png_ptr->row_buf + 1);
-               png_read_push_finish_row(png_ptr);
-            }
-
-            if (png_ptr->pass == 4) /* Skip top two generated rows */
-            {
-               for (i = 0; i < 2 && png_ptr->pass == 4; i++)
-               {
-                  png_push_have_row(png_ptr, NULL);
-                  png_read_push_finish_row(png_ptr);
-               }
-            }
-
-            break;
-         }
-
-         case 4:
-         {
-            int i;
-
-            for (i = 0; i < 2 && png_ptr->pass == 4; i++)
-            {
-               png_push_have_row(png_ptr, png_ptr->row_buf + 1);
-               png_read_push_finish_row(png_ptr);
-            }
-
-            for (i = 0; i < 2 && png_ptr->pass == 4; i++)
-            {
-               png_push_have_row(png_ptr, NULL);
-               png_read_push_finish_row(png_ptr);
-            }
-
-            if (png_ptr->pass == 6) /* Pass 5 might be empty */
-            {
-               png_push_have_row(png_ptr, NULL);
-               png_read_push_finish_row(png_ptr);
-            }
-
-            break;
-         }
-
-         case 5:
-         {
-            int i;
-
-            for (i = 0; i < 2 && png_ptr->pass == 5; i++)
-            {
-               png_push_have_row(png_ptr, png_ptr->row_buf + 1);
-               png_read_push_finish_row(png_ptr);
-            }
-
-            if (png_ptr->pass == 6) /* Skip top generated row */
-            {
-               png_push_have_row(png_ptr, NULL);
-               png_read_push_finish_row(png_ptr);
-            }
-
-            break;
-         }
-
-         default:
-         case 6:
-         {
-            png_push_have_row(png_ptr, png_ptr->row_buf + 1);
-            png_read_push_finish_row(png_ptr);
-
-            if (png_ptr->pass != 6)
-               break;
-
-            png_push_have_row(png_ptr, NULL);
-            png_read_push_finish_row(png_ptr);
-         }
-      }
-   }
-   else
-#endif
-   {
-      png_push_have_row(png_ptr, png_ptr->row_buf + 1);
-      png_read_push_finish_row(png_ptr);
-   }
-}
-
-void /* PRIVATE */
-png_read_push_finish_row(png_structrp png_ptr)
-{
-#ifdef PNG_READ_INTERLACING_SUPPORTED
-   /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */
-
-   /* Start of interlace block */
-   static const png_byte png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
-
-   /* Offset to next interlace block */
-   static const png_byte png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
-
-   /* Start of interlace block in the y direction */
-   static const png_byte png_pass_ystart[] = {0, 0, 4, 0, 2, 0, 1};
-
-   /* Offset to next interlace block in the y direction */
-   static const png_byte png_pass_yinc[] = {8, 8, 8, 4, 4, 2, 2};
-
-   /* Height of interlace block.  This is not currently used - if you need
-    * it, uncomment it here and in png.h
-   static const png_byte png_pass_height[] = {8, 8, 4, 4, 2, 2, 1};
-   */
-#endif
-
-   png_ptr->row_number++;
-   if (png_ptr->row_number < png_ptr->num_rows)
-      return;
-
-#ifdef PNG_READ_INTERLACING_SUPPORTED
-   if (png_ptr->interlaced != 0)
-   {
-      png_ptr->row_number = 0;
-      memset(png_ptr->prev_row, 0, png_ptr->rowbytes + 1);
-
-      do
-      {
-         png_ptr->pass++;
-         if ((png_ptr->pass == 1 && png_ptr->width < 5) ||
-             (png_ptr->pass == 3 && png_ptr->width < 3) ||
-             (png_ptr->pass == 5 && png_ptr->width < 2))
-            png_ptr->pass++;
-
-         if (png_ptr->pass > 7)
-            png_ptr->pass--;
-
-         if (png_ptr->pass >= 7)
-            break;
-
-         png_ptr->iwidth = (png_ptr->width +
-             png_pass_inc[png_ptr->pass] - 1 -
-             png_pass_start[png_ptr->pass]) /
-             png_pass_inc[png_ptr->pass];
-
-         if ((png_ptr->transformations & PNG_INTERLACE) != 0)
-            break;
-
-         png_ptr->num_rows = (png_ptr->height +
-             png_pass_yinc[png_ptr->pass] - 1 -
-             png_pass_ystart[png_ptr->pass]) /
-             png_pass_yinc[png_ptr->pass];
-
-      } while (png_ptr->iwidth == 0 || png_ptr->num_rows == 0);
-   }
-#endif /* READ_INTERLACING */
-}
-
-void /* PRIVATE */
-png_push_have_info(png_structrp png_ptr, png_inforp info_ptr)
-{
-   if (png_ptr->info_fn != NULL)
-      (*(png_ptr->info_fn))(png_ptr, info_ptr);
-}
-
-void /* PRIVATE */
-png_push_have_end(png_structrp png_ptr, png_inforp info_ptr)
-{
-   if (png_ptr->end_fn != NULL)
-      (*(png_ptr->end_fn))(png_ptr, info_ptr);
-}
-
-void /* PRIVATE */
-png_push_have_row(png_structrp png_ptr, png_bytep row)
-{
-   if (png_ptr->row_fn != NULL)
-      (*(png_ptr->row_fn))(png_ptr, row, png_ptr->row_number,
-          (int)png_ptr->pass);
-}
-
-#ifdef PNG_READ_INTERLACING_SUPPORTED
-void PNGAPI
-png_progressive_combine_row(png_const_structrp png_ptr, png_bytep old_row,
-    png_const_bytep new_row)
-{
-   if (png_ptr == NULL)
-      return;
-
-   /* new_row is a flag here - if it is NULL then the app callback was called
-    * from an empty row (see the calls to png_struct::row_fn below), otherwise
-    * it must be png_ptr->row_buf+1
-    */
-   if (new_row != NULL)
-      png_combine_row(png_ptr, old_row, 1/*blocky display*/);
-}
-#endif /* READ_INTERLACING */
-
-void PNGAPI
-png_set_progressive_read_fn(png_structrp png_ptr, png_voidp progressive_ptr,
-    png_progressive_info_ptr info_fn, png_progressive_row_ptr row_fn,
-    png_progressive_end_ptr end_fn)
-{
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->info_fn = info_fn;
-   png_ptr->row_fn = row_fn;
-   png_ptr->end_fn = end_fn;
-
-   png_set_read_fn(png_ptr, progressive_ptr, png_push_fill_buffer);
-}
-
-png_voidp PNGAPI
-png_get_progressive_ptr(png_const_structrp png_ptr)
-{
-   if (png_ptr == NULL)
-      return NULL;
-
-   return png_ptr->io_ptr;
-}
-#endif /* PROGRESSIVE_READ */
diff --git a/dep/libpng/src/pngpriv.h b/dep/libpng/src/pngpriv.h
deleted file mode 100644
index 9bfdb7134..000000000
--- a/dep/libpng/src/pngpriv.h
+++ /dev/null
@@ -1,2221 +0,0 @@
-
-/* pngpriv.h - private declarations for use inside libpng
- *
- * Copyright (c) 2018-2024 Cosmin Truta
- * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
- * Copyright (c) 1996-1997 Andreas Dilger
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-/* The symbols declared in this file (including the functions declared
- * as extern) are PRIVATE.  They are not part of the libpng public
- * interface, and are not recommended for use by regular applications.
- * Some of them may become public in the future; others may stay private,
- * change in an incompatible way, or even disappear.
- * Although the libpng users are not forbidden to include this header,
- * they should be well aware of the issues that may arise from doing so.
- */
-
-#ifndef PNGPRIV_H
-#define PNGPRIV_H
-
-/* Feature Test Macros.  The following are defined here to ensure that correctly
- * implemented libraries reveal the APIs libpng needs to build and hide those
- * that are not needed and potentially damaging to the compilation.
- *
- * Feature Test Macros must be defined before any system header is included (see
- * POSIX 1003.1 2.8.2 "POSIX Symbols."
- *
- * These macros only have an effect if the operating system supports either
- * POSIX 1003.1 or C99, or both.  On other operating systems (particularly
- * Windows/Visual Studio) there is no effect; the OS specific tests below are
- * still required (as of 2011-05-02.)
- */
-#ifndef _POSIX_SOURCE
-#  define _POSIX_SOURCE 1 /* Just the POSIX 1003.1 and C89 APIs */
-#endif
-
-#ifndef PNG_VERSION_INFO_ONLY
-/* Standard library headers not required by png.h: */
-#  include <stdlib.h>
-#  include <string.h>
-#endif
-
-#define PNGLIB_BUILD /*libpng is being built, not used*/
-
-/* If HAVE_CONFIG_H is defined during the build then the build system must
- * provide an appropriate "config.h" file on the include path.  The header file
- * must provide definitions as required below (search for "HAVE_CONFIG_H");
- * see configure.ac for more details of the requirements.  The macro
- * "PNG_NO_CONFIG_H" is provided for maintainers to test for dependencies on
- * 'configure'; define this macro to prevent the configure build including the
- * configure generated config.h.  Libpng is expected to compile without *any*
- * special build system support on a reasonably ANSI-C compliant system.
- */
-#if defined(HAVE_CONFIG_H) && !defined(PNG_NO_CONFIG_H)
-#  include <config.h>
-
-   /* Pick up the definition of 'restrict' from config.h if it was read: */
-#  define PNG_RESTRICT restrict
-#endif
-
-/* To support symbol prefixing it is necessary to know *before* including png.h
- * whether the fixed point (and maybe other) APIs are exported, because if they
- * are not internal definitions may be required.  This is handled below just
- * before png.h is included, but load the configuration now if it is available.
- */
-#ifndef PNGLCONF_H
-#  include "pnglibconf.h"
-#endif
-
-/* Local renames may change non-exported API functions from png.h */
-#if defined(PNG_PREFIX) && !defined(PNGPREFIX_H)
-#  include "pngprefix.h"
-#endif
-
-#ifdef PNG_USER_CONFIG
-#  include "pngusr.h"
-   /* These should have been defined in pngusr.h */
-#  ifndef PNG_USER_PRIVATEBUILD
-#    define PNG_USER_PRIVATEBUILD "Custom libpng build"
-#  endif
-#  ifndef PNG_USER_DLLFNAME_POSTFIX
-#    define PNG_USER_DLLFNAME_POSTFIX "Cb"
-#  endif
-#endif
-
-/* Compile time options.
- * =====================
- * In a multi-arch build the compiler may compile the code several times for the
- * same object module, producing different binaries for different architectures.
- * When this happens configure-time setting of the target host options cannot be
- * done and this interferes with the handling of the ARM NEON optimizations, and
- * possibly other similar optimizations.  Put additional tests here; in general
- * this is needed when the same option can be changed at both compile time and
- * run time depending on the target OS (i.e. iOS vs Android.)
- *
- * NOTE: symbol prefixing does not pass $(CFLAGS) to the preprocessor, because
- * this is not possible with certain compilers (Oracle SUN OS CC), as a result
- * it is necessary to ensure that all extern functions that *might* be used
- * regardless of $(CFLAGS) get declared in this file.  The test on __ARM_NEON__
- * below is one example of this behavior because it is controlled by the
- * presence or not of -mfpu=neon on the GCC command line, it is possible to do
- * this in $(CC), e.g. "CC=gcc -mfpu=neon", but people who build libpng rarely
- * do this.
- */
-#ifndef PNG_ARM_NEON_OPT
-   /* ARM NEON optimizations are being controlled by the compiler settings,
-    * typically the target FPU.  If the FPU has been set to NEON (-mfpu=neon
-    * with GCC) then the compiler will define __ARM_NEON__ and we can rely
-    * unconditionally on NEON instructions not crashing, otherwise we must
-    * disable use of NEON instructions.
-    *
-    * NOTE: at present these optimizations depend on 'ALIGNED_MEMORY', so they
-    * can only be turned on automatically if that is supported too.  If
-    * PNG_ARM_NEON_OPT is set in CPPFLAGS (to >0) then arm/arm_init.c will fail
-    * to compile with an appropriate #error if ALIGNED_MEMORY has been turned
-    * off.
-    *
-    * Note that gcc-4.9 defines __ARM_NEON instead of the deprecated
-    * __ARM_NEON__, so we check both variants.
-    *
-    * To disable ARM_NEON optimizations entirely, and skip compiling the
-    * associated assembler code, pass --enable-arm-neon=no to configure
-    * or put -DPNG_ARM_NEON_OPT=0 in CPPFLAGS.
-    */
-#  if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && \
-   defined(PNG_ALIGNED_MEMORY_SUPPORTED)
-#     define PNG_ARM_NEON_OPT 2
-#  else
-#     define PNG_ARM_NEON_OPT 0
-#  endif
-#endif
-
-#if PNG_ARM_NEON_OPT > 0
-   /* NEON optimizations are to be at least considered by libpng, so enable the
-    * callbacks to do this.
-    */
-#  define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_neon
-
-   /* By default the 'intrinsics' code in arm/filter_neon_intrinsics.c is used
-    * if possible - if __ARM_NEON__ is set and the compiler version is not known
-    * to be broken.  This is controlled by PNG_ARM_NEON_IMPLEMENTATION which can
-    * be:
-    *
-    *    1  The intrinsics code (the default with __ARM_NEON__)
-    *    2  The hand coded assembler (the default without __ARM_NEON__)
-    *
-    * It is possible to set PNG_ARM_NEON_IMPLEMENTATION in CPPFLAGS, however
-    * this is *NOT* supported and may cease to work even after a minor revision
-    * to libpng.  It *is* valid to do this for testing purposes, e.g. speed
-    * testing or a new compiler, but the results should be communicated to the
-    * libpng implementation list for incorporation in the next minor release.
-    */
-#  ifndef PNG_ARM_NEON_IMPLEMENTATION
-#     if defined(__ARM_NEON__) || defined(__ARM_NEON)
-#        if defined(__clang__)
-            /* At present it is unknown by the libpng developers which versions
-             * of clang support the intrinsics, however some or perhaps all
-             * versions do not work with the assembler so this may be
-             * irrelevant, so just use the default (do nothing here.)
-             */
-#        elif defined(__GNUC__)
-            /* GCC 4.5.4 NEON support is known to be broken.  4.6.3 is known to
-             * work, so if this *is* GCC, or G++, look for a version >4.5
-             */
-#           if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 6)
-#              define PNG_ARM_NEON_IMPLEMENTATION 2
-#           endif /* no GNUC support */
-#        endif /* __GNUC__ */
-#     else /* !defined __ARM_NEON__ */
-         /* The 'intrinsics' code simply won't compile without this -mfpu=neon:
-          */
-#        if !defined(__aarch64__) && !defined(_M_ARM64)
-            /* The assembler code currently does not work on ARM64 */
-#          define PNG_ARM_NEON_IMPLEMENTATION 2
-#        endif /* __aarch64__ */
-#     endif /* __ARM_NEON__ */
-#  endif /* !PNG_ARM_NEON_IMPLEMENTATION */
-
-#  ifndef PNG_ARM_NEON_IMPLEMENTATION
-      /* Use the intrinsics code by default. */
-#     define PNG_ARM_NEON_IMPLEMENTATION 1
-#  endif
-#else /* PNG_ARM_NEON_OPT == 0 */
-#     define PNG_ARM_NEON_IMPLEMENTATION 0
-#endif /* PNG_ARM_NEON_OPT > 0 */
-
-#ifndef PNG_MIPS_MSA_OPT
-#  if defined(__mips_msa) && (__mips_isa_rev >= 5) && \
-   defined(PNG_ALIGNED_MEMORY_SUPPORTED)
-#     define PNG_MIPS_MSA_OPT 2
-#  else
-#     define PNG_MIPS_MSA_OPT 0
-#  endif
-#endif
-
-#ifndef PNG_MIPS_MMI_OPT
-#  ifdef PNG_MIPS_MMI
-#    if defined(__mips_loongson_mmi) && (_MIPS_SIM == _ABI64) && \
-     defined(PNG_ALIGNED_MEMORY_SUPPORTED)
-#       define PNG_MIPS_MMI_OPT 1
-#    else
-#       define PNG_MIPS_MMI_OPT 0
-#    endif
-#  else
-#    define PNG_MIPS_MMI_OPT 0
-#  endif
-#endif
-
-#ifndef PNG_POWERPC_VSX_OPT
-#  if defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__)
-#     define PNG_POWERPC_VSX_OPT 2
-#  else
-#     define PNG_POWERPC_VSX_OPT 0
-#  endif
-#endif
-
-#ifndef PNG_LOONGARCH_LSX_OPT
-#  if defined(__loongarch_sx)
-#     define PNG_LOONGARCH_LSX_OPT 1
-#  else
-#     define PNG_LOONGARCH_LSX_OPT 0
-#  endif
-#endif
-
-#ifndef PNG_INTEL_SSE_OPT
-#   ifdef PNG_INTEL_SSE
-      /* Only check for SSE if the build configuration has been modified to
-       * enable SSE optimizations.  This means that these optimizations will
-       * be off by default.  See contrib/intel for more details.
-       */
-#      if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \
-       defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
-       (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
-#         define PNG_INTEL_SSE_OPT 1
-#      else
-#         define PNG_INTEL_SSE_OPT 0
-#      endif
-#   else
-#      define PNG_INTEL_SSE_OPT 0
-#   endif
-#endif
-
-#if PNG_INTEL_SSE_OPT > 0
-#   ifndef PNG_INTEL_SSE_IMPLEMENTATION
-#      if defined(__SSE4_1__) || defined(__AVX__)
-          /* We are not actually using AVX, but checking for AVX is the best
-             way we can detect SSE4.1 and SSSE3 on MSVC.
-          */
-#         define PNG_INTEL_SSE_IMPLEMENTATION 3
-#      elif defined(__SSSE3__)
-#         define PNG_INTEL_SSE_IMPLEMENTATION 2
-#      elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
-       (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
-#         define PNG_INTEL_SSE_IMPLEMENTATION 1
-#      else
-#         define PNG_INTEL_SSE_IMPLEMENTATION 0
-#      endif
-#   endif
-
-#   if PNG_INTEL_SSE_IMPLEMENTATION > 0
-#      define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2
-#   endif
-#else
-#   define PNG_INTEL_SSE_IMPLEMENTATION 0
-#endif
-
-#if PNG_MIPS_MSA_OPT > 0
-#  ifndef PNG_MIPS_MSA_IMPLEMENTATION
-#     if defined(__mips_msa)
-#        if defined(__clang__)
-#        elif defined(__GNUC__)
-#           if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 7)
-#              define PNG_MIPS_MSA_IMPLEMENTATION 2
-#           endif /* no GNUC support */
-#        endif /* __GNUC__ */
-#     else /* !defined __mips_msa */
-#        define PNG_MIPS_MSA_IMPLEMENTATION 2
-#     endif /* __mips_msa */
-#  endif /* !PNG_MIPS_MSA_IMPLEMENTATION */
-
-#  ifndef PNG_MIPS_MSA_IMPLEMENTATION
-#     define PNG_MIPS_MSA_IMPLEMENTATION 1
-#     define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_mips
-#  endif
-#else
-#  define PNG_MIPS_MSA_IMPLEMENTATION 0
-#endif /* PNG_MIPS_MSA_OPT > 0 */
-
-#if PNG_MIPS_MMI_OPT > 0
-#  ifndef PNG_MIPS_MMI_IMPLEMENTATION
-#     if defined(__mips_loongson_mmi) && (_MIPS_SIM == _ABI64)
-#        define PNG_MIPS_MMI_IMPLEMENTATION 2
-#     else /* !defined __mips_loongson_mmi  || _MIPS_SIM != _ABI64 */
-#        define PNG_MIPS_MMI_IMPLEMENTATION 0
-#     endif /* __mips_loongson_mmi  && _MIPS_SIM == _ABI64 */
-#  endif /* !PNG_MIPS_MMI_IMPLEMENTATION */
-
-#   if PNG_MIPS_MMI_IMPLEMENTATION > 0
-#      define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_mips
-#   endif
-#else
-#   define PNG_MIPS_MMI_IMPLEMENTATION 0
-#endif /* PNG_MIPS_MMI_OPT > 0 */
-
-#if PNG_POWERPC_VSX_OPT > 0
-#  define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_vsx
-#  define PNG_POWERPC_VSX_IMPLEMENTATION 1
-#else
-#  define PNG_POWERPC_VSX_IMPLEMENTATION 0
-#endif
-
-#if PNG_LOONGARCH_LSX_OPT > 0
-#   define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_lsx
-#   define PNG_LOONGARCH_LSX_IMPLEMENTATION 1
-#else
-#   define PNG_LOONGARCH_LSX_IMPLEMENTATION 0
-#endif
-
-/* Is this a build of a DLL where compilation of the object modules requires
- * different preprocessor settings to those required for a simple library?  If
- * so PNG_BUILD_DLL must be set.
- *
- * If libpng is used inside a DLL but that DLL does not export the libpng APIs
- * PNG_BUILD_DLL must not be set.  To avoid the code below kicking in build a
- * static library of libpng then link the DLL against that.
- */
-#ifndef PNG_BUILD_DLL
-#  ifdef DLL_EXPORT
-      /* This is set by libtool when files are compiled for a DLL; libtool
-       * always compiles twice, even on systems where it isn't necessary.  Set
-       * PNG_BUILD_DLL in case it is necessary:
-       */
-#     define PNG_BUILD_DLL
-#  else
-#     ifdef _WINDLL
-         /* This is set by the Microsoft Visual Studio IDE in projects that
-          * build a DLL.  It can't easily be removed from those projects (it
-          * isn't visible in the Visual Studio UI) so it is a fairly reliable
-          * indication that PNG_IMPEXP needs to be set to the DLL export
-          * attributes.
-          */
-#        define PNG_BUILD_DLL
-#     else
-#        ifdef __DLL__
-            /* This is set by the Borland C system when compiling for a DLL
-             * (as above.)
-             */
-#           define PNG_BUILD_DLL
-#        else
-            /* Add additional compiler cases here. */
-#        endif
-#     endif
-#  endif
-#endif /* Setting PNG_BUILD_DLL if required */
-
-/* See pngconf.h for more details: the builder of the library may set this on
- * the command line to the right thing for the specific compilation system or it
- * may be automagically set above (at present we know of no system where it does
- * need to be set on the command line.)
- *
- * PNG_IMPEXP must be set here when building the library to prevent pngconf.h
- * setting it to the "import" setting for a DLL build.
- */
-#ifndef PNG_IMPEXP
-#  ifdef PNG_BUILD_DLL
-#     define PNG_IMPEXP PNG_DLL_EXPORT
-#  else
-      /* Not building a DLL, or the DLL doesn't require specific export
-       * definitions.
-       */
-#     define PNG_IMPEXP
-#  endif
-#endif
-
-/* No warnings for private or deprecated functions in the build: */
-#ifndef PNG_DEPRECATED
-#  define PNG_DEPRECATED
-#endif
-#ifndef PNG_PRIVATE
-#  define PNG_PRIVATE
-#endif
-
-/* Symbol preprocessing support.
- *
- * To enable listing global, but internal, symbols the following macros should
- * always be used to declare an extern data or function object in this file.
- */
-#ifndef PNG_INTERNAL_DATA
-#  define PNG_INTERNAL_DATA(type, name, array) PNG_LINKAGE_DATA type name array
-#endif
-
-#ifndef PNG_INTERNAL_FUNCTION
-#  define PNG_INTERNAL_FUNCTION(type, name, args, attributes)\
-      PNG_LINKAGE_FUNCTION PNG_FUNCTION(type, name, args, PNG_EMPTY attributes)
-#endif
-
-#ifndef PNG_INTERNAL_CALLBACK
-#  define PNG_INTERNAL_CALLBACK(type, name, args, attributes)\
-      PNG_LINKAGE_CALLBACK PNG_FUNCTION(type, (PNGCBAPI name), args,\
-         PNG_EMPTY attributes)
-#endif
-
-/* If floating or fixed point APIs are disabled they may still be compiled
- * internally.  To handle this make sure they are declared as the appropriate
- * internal extern function (otherwise the symbol prefixing stuff won't work and
- * the functions will be used without definitions.)
- *
- * NOTE: although all the API functions are declared here they are not all
- * actually built!  Because the declarations are still made it is necessary to
- * fake out types that they depend on.
- */
-#ifndef PNG_FP_EXPORT
-#  ifndef PNG_FLOATING_POINT_SUPPORTED
-#     define PNG_FP_EXPORT(ordinal, type, name, args)\
-         PNG_INTERNAL_FUNCTION(type, name, args, PNG_EMPTY);
-#     ifndef PNG_VERSION_INFO_ONLY
-         typedef struct png_incomplete png_double;
-         typedef png_double*           png_doublep;
-         typedef const png_double*     png_const_doublep;
-         typedef png_double**          png_doublepp;
-#     endif
-#  endif
-#endif
-#ifndef PNG_FIXED_EXPORT
-#  ifndef PNG_FIXED_POINT_SUPPORTED
-#     define PNG_FIXED_EXPORT(ordinal, type, name, args)\
-         PNG_INTERNAL_FUNCTION(type, name, args, PNG_EMPTY);
-#  endif
-#endif
-
-#include "png.h"
-
-/* pngconf.h does not set PNG_DLL_EXPORT unless it is required, so: */
-#ifndef PNG_DLL_EXPORT
-#  define PNG_DLL_EXPORT
-#endif
-
-/* This is a global switch to set the compilation for an installed system
- * (a release build).  It can be set for testing debug builds to ensure that
- * they will compile when the build type is switched to RC or STABLE, the
- * default is just to use PNG_LIBPNG_BUILD_BASE_TYPE.  Set this in CPPFLAGS
- * with either:
- *
- *   -DPNG_RELEASE_BUILD Turns on the release compile path
- *   -DPNG_RELEASE_BUILD=0 Turns it off
- * or in your pngusr.h with
- *   #define PNG_RELEASE_BUILD=1 Turns on the release compile path
- *   #define PNG_RELEASE_BUILD=0 Turns it off
- */
-#ifndef PNG_RELEASE_BUILD
-#  define PNG_RELEASE_BUILD (PNG_LIBPNG_BUILD_BASE_TYPE >= PNG_LIBPNG_BUILD_RC)
-#endif
-
-/* SECURITY and SAFETY:
- *
- * libpng is built with support for internal limits on image dimensions and
- * memory usage.  These are documented in scripts/pnglibconf.dfa of the
- * source and recorded in the machine generated header file pnglibconf.h.
- */
-
-/* If you are running on a machine where you cannot allocate more
- * than 64K of memory at once, uncomment this.  While libpng will not
- * normally need that much memory in a chunk (unless you load up a very
- * large file), zlib needs to know how big of a chunk it can use, and
- * libpng thus makes sure to check any memory allocation to verify it
- * will fit into memory.
- *
- * zlib provides 'MAXSEG_64K' which, if defined, indicates the
- * same limit and pngconf.h (already included) sets the limit
- * if certain operating systems are detected.
- */
-#if defined(MAXSEG_64K) && !defined(PNG_MAX_MALLOC_64K)
-#  define PNG_MAX_MALLOC_64K
-#endif
-
-#ifndef PNG_UNUSED
-/* Unused formal parameter warnings are silenced using the following macro
- * which is expected to have no bad effects on performance (optimizing
- * compilers will probably remove it entirely).  Note that if you replace
- * it with something other than whitespace, you must include the terminating
- * semicolon.
- */
-#  define PNG_UNUSED(param) (void)param;
-#endif
-
-/* Just a little check that someone hasn't tried to define something
- * contradictory.
- */
-#if (PNG_ZBUF_SIZE > 65536L) && defined(PNG_MAX_MALLOC_64K)
-#  undef PNG_ZBUF_SIZE
-#  define PNG_ZBUF_SIZE 65536L
-#endif
-
-/* If warnings or errors are turned off the code is disabled or redirected here.
- * From 1.5.4 functions have been added to allow very limited formatting of
- * error and warning messages - this code will also be disabled here.
- */
-#ifdef PNG_WARNINGS_SUPPORTED
-#  define PNG_WARNING_PARAMETERS(p) png_warning_parameters p;
-#else
-#  define png_warning_parameter(p,number,string) ((void)0)
-#  define png_warning_parameter_unsigned(p,number,format,value) ((void)0)
-#  define png_warning_parameter_signed(p,number,format,value) ((void)0)
-#  define png_formatted_warning(pp,p,message) ((void)(pp))
-#  define PNG_WARNING_PARAMETERS(p)
-#endif
-#ifndef PNG_ERROR_TEXT_SUPPORTED
-#  define png_fixed_error(s1,s2) png_err(s1)
-#endif
-
-/* Some fixed point APIs are still required even if not exported because
- * they get used by the corresponding floating point APIs.  This magic
- * deals with this:
- */
-#ifdef PNG_FIXED_POINT_SUPPORTED
-#  define PNGFAPI PNGAPI
-#else
-#  define PNGFAPI /* PRIVATE */
-#endif
-
-#ifndef PNG_VERSION_INFO_ONLY
-/* Other defines specific to compilers can go here.  Try to keep
- * them inside an appropriate ifdef/endif pair for portability.
- */
-
-/* C allows up-casts from (void*) to any pointer and (const void*) to any
- * pointer to a const object.  C++ regards this as a type error and requires an
- * explicit, static, cast and provides the static_cast<> rune to ensure that
- * const is not cast away.
- */
-#ifdef __cplusplus
-#  define png_voidcast(type, value) static_cast<type>(value)
-#  define png_constcast(type, value) const_cast<type>(value)
-#  define png_aligncast(type, value) \
-   static_cast<type>(static_cast<void*>(value))
-#  define png_aligncastconst(type, value) \
-   static_cast<type>(static_cast<const void*>(value))
-#else
-#  define png_voidcast(type, value) (value)
-#  define png_constcast(type, value) ((type)(void*)(const void*)(value))
-#  define png_aligncast(type, value) ((void*)(value))
-#  define png_aligncastconst(type, value) ((const void*)(value))
-#endif /* __cplusplus */
-
-#if defined(PNG_FLOATING_POINT_SUPPORTED) ||\
-    defined(PNG_FLOATING_ARITHMETIC_SUPPORTED)
-   /* png.c requires the following ANSI-C constants if the conversion of
-    * floating point to ASCII is implemented therein:
-    *
-    *  DBL_DIG  Maximum number of decimal digits (can be set to any constant)
-    *  DBL_MIN  Smallest normalized fp number (can be set to an arbitrary value)
-    *  DBL_MAX  Maximum floating point number (can be set to an arbitrary value)
-    */
-#  include <float.h>
-
-#  include <math.h>
-
-#  if defined(_AMIGA) && defined(__SASC) && defined(_M68881)
-   /* Amiga SAS/C: We must include builtin FPU functions when compiling using
-    * MATH=68881
-    */
-#    include <m68881.h>
-#  endif
-#endif
-
-/* This provides the non-ANSI (far) memory allocation routines. */
-#if defined(__TURBOC__) && defined(__MSDOS__)
-#  include <mem.h>
-#  include <alloc.h>
-#endif
-
-#if defined(_WIN32) || defined(__WIN32__) || defined(__NT__)
-#  include <windows.h>
-#endif
-#endif /* PNG_VERSION_INFO_ONLY */
-
-/* Moved here around 1.5.0beta36 from pngconf.h */
-/* Users may want to use these so they are not private.  Any library
- * functions that are passed far data must be model-independent.
- */
-
-/* Platform-independent functions */
-#ifndef PNG_ABORT
-#  define PNG_ABORT() abort()
-#endif
-
-/* These macros may need to be architecture dependent. */
-#define PNG_ALIGN_NONE      0 /* do not use data alignment */
-#define PNG_ALIGN_ALWAYS    1 /* assume unaligned accesses are OK */
-#ifdef offsetof
-#  define PNG_ALIGN_OFFSET  2 /* use offsetof to determine alignment */
-#else
-#  define PNG_ALIGN_OFFSET -1 /* prevent the use of this */
-#endif
-#define PNG_ALIGN_SIZE      3 /* use sizeof to determine alignment */
-
-#ifndef PNG_ALIGN_TYPE
-   /* Default to using aligned access optimizations and requiring alignment to a
-    * multiple of the data type size.  Override in a compiler specific fashion
-    * if necessary by inserting tests here:
-    */
-#  define PNG_ALIGN_TYPE PNG_ALIGN_SIZE
-#endif
-
-#if PNG_ALIGN_TYPE == PNG_ALIGN_SIZE
-   /* This is used because in some compiler implementations non-aligned
-    * structure members are supported, so the offsetof approach below fails.
-    * Set PNG_ALIGN_SIZE=0 for compiler combinations where unaligned access
-    * is good for performance.  Do not do this unless you have tested the
-    * result and understand it.
-    */
-#  define png_alignof(type) (sizeof(type))
-#else
-#  if PNG_ALIGN_TYPE == PNG_ALIGN_OFFSET
-#    define png_alignof(type) offsetof(struct{char c; type t;}, t)
-#  else
-#    if PNG_ALIGN_TYPE == PNG_ALIGN_ALWAYS
-#      define png_alignof(type) 1
-#    endif
-     /* Else leave png_alignof undefined to prevent use thereof */
-#  endif
-#endif
-
-/* This implicitly assumes alignment is always a multiple of 2. */
-#ifdef png_alignof
-#  define png_isaligned(ptr, type) \
-   (((type)(size_t)((const void*)(ptr)) & (type)(png_alignof(type)-1)) == 0)
-#else
-#  define png_isaligned(ptr, type) 0
-#endif
-
-/* End of memory model/platform independent support */
-/* End of 1.5.0beta36 move from pngconf.h */
-
-/* CONSTANTS and UTILITY MACROS
- * These are used internally by libpng and not exposed in the API
- */
-
-/* Various modes of operation.  Note that after an init, mode is set to
- * zero automatically when the structure is created.  Three of these
- * are defined in png.h because they need to be visible to applications
- * that call png_set_unknown_chunk().
- */
-/* #define PNG_HAVE_IHDR            0x01U (defined in png.h) */
-/* #define PNG_HAVE_PLTE            0x02U (defined in png.h) */
-#define PNG_HAVE_IDAT               0x04U
-/* #define PNG_AFTER_IDAT           0x08U (defined in png.h) */
-#define PNG_HAVE_IEND               0x10U
-                   /*               0x20U (unused) */
-                   /*               0x40U (unused) */
-                   /*               0x80U (unused) */
-#define PNG_HAVE_CHUNK_HEADER      0x100U
-#define PNG_WROTE_tIME             0x200U
-#define PNG_WROTE_INFO_BEFORE_PLTE 0x400U
-#define PNG_BACKGROUND_IS_GRAY     0x800U
-#define PNG_HAVE_PNG_SIGNATURE    0x1000U
-#define PNG_HAVE_CHUNK_AFTER_IDAT 0x2000U /* Have another chunk after IDAT */
-#define PNG_WROTE_eXIf            0x4000U
-#define PNG_IS_READ_STRUCT        0x8000U /* Else is a write struct */
-
-/* Flags for the transformations the PNG library does on the image data */
-#define PNG_BGR                 0x0001U
-#define PNG_INTERLACE           0x0002U
-#define PNG_PACK                0x0004U
-#define PNG_SHIFT               0x0008U
-#define PNG_SWAP_BYTES          0x0010U
-#define PNG_INVERT_MONO         0x0020U
-#define PNG_QUANTIZE            0x0040U
-#define PNG_COMPOSE             0x0080U    /* Was PNG_BACKGROUND */
-#define PNG_BACKGROUND_EXPAND   0x0100U
-#define PNG_EXPAND_16           0x0200U    /* Added to libpng 1.5.2 */
-#define PNG_16_TO_8             0x0400U    /* Becomes 'chop' in 1.5.4 */
-#define PNG_RGBA                0x0800U
-#define PNG_EXPAND              0x1000U
-#define PNG_GAMMA               0x2000U
-#define PNG_GRAY_TO_RGB         0x4000U
-#define PNG_FILLER              0x8000U
-#define PNG_PACKSWAP           0x10000U
-#define PNG_SWAP_ALPHA         0x20000U
-#define PNG_STRIP_ALPHA        0x40000U
-#define PNG_INVERT_ALPHA       0x80000U
-#define PNG_USER_TRANSFORM    0x100000U
-#define PNG_RGB_TO_GRAY_ERR   0x200000U
-#define PNG_RGB_TO_GRAY_WARN  0x400000U
-#define PNG_RGB_TO_GRAY       0x600000U /* two bits, RGB_TO_GRAY_ERR|WARN */
-#define PNG_ENCODE_ALPHA      0x800000U /* Added to libpng-1.5.4 */
-#define PNG_ADD_ALPHA        0x1000000U /* Added to libpng-1.2.7 */
-#define PNG_EXPAND_tRNS      0x2000000U /* Added to libpng-1.2.9 */
-#define PNG_SCALE_16_TO_8    0x4000000U /* Added to libpng-1.5.4 */
-                       /*    0x8000000U unused */
-                       /*   0x10000000U unused */
-                       /*   0x20000000U unused */
-                       /*   0x40000000U unused */
-/* Flags for png_create_struct */
-#define PNG_STRUCT_PNG   0x0001U
-#define PNG_STRUCT_INFO  0x0002U
-
-/* Flags for the png_ptr->flags rather than declaring a byte for each one */
-#define PNG_FLAG_ZLIB_CUSTOM_STRATEGY     0x0001U
-#define PNG_FLAG_ZSTREAM_INITIALIZED      0x0002U /* Added to libpng-1.6.0 */
-                                  /*      0x0004U    unused */
-#define PNG_FLAG_ZSTREAM_ENDED            0x0008U /* Added to libpng-1.6.0 */
-                                  /*      0x0010U    unused */
-                                  /*      0x0020U    unused */
-#define PNG_FLAG_ROW_INIT                 0x0040U
-#define PNG_FLAG_FILLER_AFTER             0x0080U
-#define PNG_FLAG_CRC_ANCILLARY_USE        0x0100U
-#define PNG_FLAG_CRC_ANCILLARY_NOWARN     0x0200U
-#define PNG_FLAG_CRC_CRITICAL_USE         0x0400U
-#define PNG_FLAG_CRC_CRITICAL_IGNORE      0x0800U
-#define PNG_FLAG_ASSUME_sRGB              0x1000U /* Added to libpng-1.5.4 */
-#define PNG_FLAG_OPTIMIZE_ALPHA           0x2000U /* Added to libpng-1.5.4 */
-#define PNG_FLAG_DETECT_UNINITIALIZED     0x4000U /* Added to libpng-1.5.4 */
-/* #define PNG_FLAG_KEEP_UNKNOWN_CHUNKS      0x8000U */
-/* #define PNG_FLAG_KEEP_UNSAFE_CHUNKS      0x10000U */
-#define PNG_FLAG_LIBRARY_MISMATCH        0x20000U
-#define PNG_FLAG_STRIP_ERROR_NUMBERS     0x40000U
-#define PNG_FLAG_STRIP_ERROR_TEXT        0x80000U
-#define PNG_FLAG_BENIGN_ERRORS_WARN     0x100000U /* Added to libpng-1.4.0 */
-#define PNG_FLAG_APP_WARNINGS_WARN      0x200000U /* Added to libpng-1.6.0 */
-#define PNG_FLAG_APP_ERRORS_WARN        0x400000U /* Added to libpng-1.6.0 */
-                                  /*    0x800000U    unused */
-                                  /*   0x1000000U    unused */
-                                  /*   0x2000000U    unused */
-                                  /*   0x4000000U    unused */
-                                  /*   0x8000000U    unused */
-                                  /*  0x10000000U    unused */
-                                  /*  0x20000000U    unused */
-                                  /*  0x40000000U    unused */
-
-#define PNG_FLAG_CRC_ANCILLARY_MASK (PNG_FLAG_CRC_ANCILLARY_USE | \
-                                     PNG_FLAG_CRC_ANCILLARY_NOWARN)
-
-#define PNG_FLAG_CRC_CRITICAL_MASK  (PNG_FLAG_CRC_CRITICAL_USE | \
-                                     PNG_FLAG_CRC_CRITICAL_IGNORE)
-
-#define PNG_FLAG_CRC_MASK           (PNG_FLAG_CRC_ANCILLARY_MASK | \
-                                     PNG_FLAG_CRC_CRITICAL_MASK)
-
-/* Save typing and make code easier to understand */
-
-#define PNG_COLOR_DIST(c1, c2) (abs((int)((c1).red) - (int)((c2).red)) + \
-   abs((int)((c1).green) - (int)((c2).green)) + \
-   abs((int)((c1).blue) - (int)((c2).blue)))
-
-/* Added to libpng-1.6.0: scale a 16-bit value in the range 0..65535 to 0..255
- * by dividing by 257 *with rounding*.  This macro is exact for the given range.
- * See the discourse in pngrtran.c png_do_scale_16_to_8.  The values in the
- * macro were established by experiment (modifying the added value).  The macro
- * has a second variant that takes a value already scaled by 255 and divides by
- * 65535 - this has a maximum error of .502.  Over the range 0..65535*65535 it
- * only gives off-by-one errors and only for 0.5% (1 in 200) of the values.
- */
-#define PNG_DIV65535(v24) (((v24) + 32895) >> 16)
-#define PNG_DIV257(v16) PNG_DIV65535((png_uint_32)(v16) * 255)
-
-/* Added to libpng-1.2.6 JB */
-#define PNG_ROWBYTES(pixel_bits, width) \
-    ((pixel_bits) >= 8 ? \
-    ((size_t)(width) * (((size_t)(pixel_bits)) >> 3)) : \
-    (( ((size_t)(width) * ((size_t)(pixel_bits))) + 7) >> 3) )
-
-/* This returns the number of trailing bits in the last byte of a row, 0 if the
- * last byte is completely full of pixels.  It is, in principle, (pixel_bits x
- * width) % 8, but that would overflow for large 'width'.  The second macro is
- * the same except that it returns the number of unused bits in the last byte;
- * (8-TRAILBITS), but 0 when TRAILBITS is 0.
- *
- * NOTE: these macros are intended to be self-evidently correct and never
- * overflow on the assumption that pixel_bits is in the range 0..255.  The
- * arguments are evaluated only once and they can be signed (e.g. as a result of
- * the integral promotions).  The result of the expression always has type
- * (png_uint_32), however the compiler always knows it is in the range 0..7.
- */
-#define PNG_TRAILBITS(pixel_bits, width) \
-    (((pixel_bits) * ((width) % (png_uint_32)8)) % 8)
-
-#define PNG_PADBITS(pixel_bits, width) \
-    ((8 - PNG_TRAILBITS(pixel_bits, width)) % 8)
-
-/* PNG_OUT_OF_RANGE returns true if value is outside the range
- * ideal-delta..ideal+delta.  Each argument is evaluated twice.
- * "ideal" and "delta" should be constants, normally simple
- * integers, "value" a variable. Added to libpng-1.2.6 JB
- */
-#define PNG_OUT_OF_RANGE(value, ideal, delta) \
-   ( (value) < (ideal)-(delta) || (value) > (ideal)+(delta) )
-
-/* Conversions between fixed and floating point, only defined if
- * required (to make sure the code doesn't accidentally use float
- * when it is supposedly disabled.)
- */
-#ifdef PNG_FLOATING_POINT_SUPPORTED
-/* The floating point conversion can't overflow, though it can and
- * does lose accuracy relative to the original fixed point value.
- * In practice this doesn't matter because png_fixed_point only
- * stores numbers with very low precision.  The png_ptr and s
- * arguments are unused by default but are there in case error
- * checking becomes a requirement.
- */
-#define png_float(png_ptr, fixed, s) (.00001 * (fixed))
-
-/* The fixed point conversion performs range checking and evaluates
- * its argument multiple times, so must be used with care.  The
- * range checking uses the PNG specification values for a signed
- * 32-bit fixed point value except that the values are deliberately
- * rounded-to-zero to an integral value - 21474 (21474.83 is roughly
- * (2^31-1) * 100000). 's' is a string that describes the value being
- * converted.
- *
- * NOTE: this macro will raise a png_error if the range check fails,
- * therefore it is normally only appropriate to use this on values
- * that come from API calls or other sources where an out of range
- * error indicates a programming error, not a data error!
- *
- * NOTE: by default this is off - the macro is not used - because the
- * function call saves a lot of code.
- */
-#ifdef PNG_FIXED_POINT_MACRO_SUPPORTED
-#define png_fixed(png_ptr, fp, s) ((fp) <= 21474 && (fp) >= -21474 ?\
-    ((png_fixed_point)(100000 * (fp))) : (png_fixed_error(png_ptr, s),0))
-#endif
-/* else the corresponding function is defined below, inside the scope of the
- * cplusplus test.
- */
-#endif
-
-/* Constants for known chunk types.  If you need to add a chunk, define the name
- * here.  For historical reasons these constants have the form png_<name>; i.e.
- * the prefix is lower case.  Please use decimal values as the parameters to
- * match the ISO PNG specification and to avoid relying on the C locale
- * interpretation of character values.
- *
- * Prior to 1.5.6 these constants were strings, as of 1.5.6 png_uint_32 values
- * are computed and a new macro (PNG_STRING_FROM_CHUNK) added to allow a string
- * to be generated if required.
- *
- * PNG_32b correctly produces a value shifted by up to 24 bits, even on
- * architectures where (int) is only 16 bits.
- */
-#define PNG_32b(b,s) ((png_uint_32)(b) << (s))
-#define PNG_U32(b1,b2,b3,b4) \
-   (PNG_32b(b1,24) | PNG_32b(b2,16) | PNG_32b(b3,8) | PNG_32b(b4,0))
-
-/* Constants for known chunk types.
- *
- * MAINTAINERS: If you need to add a chunk, define the name here.
- * For historical reasons these constants have the form png_<name>; i.e.
- * the prefix is lower case.  Please use decimal values as the parameters to
- * match the ISO PNG specification and to avoid relying on the C locale
- * interpretation of character values.  Please keep the list sorted.
- *
- * Notice that PNG_U32 is used to define a 32-bit value for the 4 byte chunk
- * type.  In fact the specification does not express chunk types this way,
- * however using a 32-bit value means that the chunk type can be read from the
- * stream using exactly the same code as used for a 32-bit unsigned value and
- * can be examined far more efficiently (using one arithmetic compare).
- *
- * Prior to 1.5.6 the chunk type constants were expressed as C strings.  The
- * libpng API still uses strings for 'unknown' chunks and a macro,
- * PNG_STRING_FROM_CHUNK, allows a string to be generated if required.  Notice
- * that for portable code numeric values must still be used; the string "IHDR"
- * is not portable and neither is PNG_U32('I', 'H', 'D', 'R').
- *
- * In 1.7.0 the definitions will be made public in png.h to avoid having to
- * duplicate the same definitions in application code.
- */
-#define png_IDAT PNG_U32( 73,  68,  65,  84)
-#define png_IEND PNG_U32( 73,  69,  78,  68)
-#define png_IHDR PNG_U32( 73,  72,  68,  82)
-#define png_PLTE PNG_U32( 80,  76,  84,  69)
-#define png_bKGD PNG_U32( 98,  75,  71,  68)
-#define png_cHRM PNG_U32( 99,  72,  82,  77)
-#define png_eXIf PNG_U32(101,  88,  73, 102) /* registered July 2017 */
-#define png_fRAc PNG_U32(102,  82,  65,  99) /* registered, not defined */
-#define png_gAMA PNG_U32(103,  65,  77,  65)
-#define png_gIFg PNG_U32(103,  73,  70, 103)
-#define png_gIFt PNG_U32(103,  73,  70, 116) /* deprecated */
-#define png_gIFx PNG_U32(103,  73,  70, 120)
-#define png_hIST PNG_U32(104,  73,  83,  84)
-#define png_iCCP PNG_U32(105,  67,  67,  80)
-#define png_iTXt PNG_U32(105,  84,  88, 116)
-#define png_oFFs PNG_U32(111,  70,  70, 115)
-#define png_pCAL PNG_U32(112,  67,  65,  76)
-#define png_pHYs PNG_U32(112,  72,  89, 115)
-#define png_sBIT PNG_U32(115,  66,  73,  84)
-#define png_sCAL PNG_U32(115,  67,  65,  76)
-#define png_sPLT PNG_U32(115,  80,  76,  84)
-#define png_sRGB PNG_U32(115,  82,  71,  66)
-#define png_sTER PNG_U32(115,  84,  69,  82)
-#define png_tEXt PNG_U32(116,  69,  88, 116)
-#define png_tIME PNG_U32(116,  73,  77,  69)
-#define png_tRNS PNG_U32(116,  82,  78,  83)
-#define png_zTXt PNG_U32(122,  84,  88, 116)
-
-/* The following will work on (signed char*) strings, whereas the get_uint_32
- * macro will fail on top-bit-set values because of the sign extension.
- */
-#define PNG_CHUNK_FROM_STRING(s)\
-   PNG_U32(0xff & (s)[0], 0xff & (s)[1], 0xff & (s)[2], 0xff & (s)[3])
-
-/* This uses (char), not (png_byte) to avoid warnings on systems where (char) is
- * signed and the argument is a (char[])  This macro will fail miserably on
- * systems where (char) is more than 8 bits.
- */
-#define PNG_STRING_FROM_CHUNK(s,c)\
-   (void)(((char*)(s))[0]=(char)(((c)>>24) & 0xff), \
-   ((char*)(s))[1]=(char)(((c)>>16) & 0xff),\
-   ((char*)(s))[2]=(char)(((c)>>8) & 0xff), \
-   ((char*)(s))[3]=(char)((c & 0xff)))
-
-/* Do the same but terminate with a null character. */
-#define PNG_CSTRING_FROM_CHUNK(s,c)\
-   (void)(PNG_STRING_FROM_CHUNK(s,c), ((char*)(s))[4] = 0)
-
-/* Test on flag values as defined in the spec (section 5.4): */
-#define PNG_CHUNK_ANCILLARY(c)   (1 & ((c) >> 29))
-#define PNG_CHUNK_CRITICAL(c)     (!PNG_CHUNK_ANCILLARY(c))
-#define PNG_CHUNK_PRIVATE(c)      (1 & ((c) >> 21))
-#define PNG_CHUNK_RESERVED(c)     (1 & ((c) >> 13))
-#define PNG_CHUNK_SAFE_TO_COPY(c) (1 & ((c) >>  5))
-
-/* Gamma values (new at libpng-1.5.4): */
-#define PNG_GAMMA_MAC_OLD 151724  /* Assume '1.8' is really 2.2/1.45! */
-#define PNG_GAMMA_MAC_INVERSE 65909
-#define PNG_GAMMA_sRGB_INVERSE 45455
-
-/* Almost everything below is C specific; the #defines above can be used in
- * non-C code (so long as it is C-preprocessed) the rest of this stuff cannot.
- */
-#ifndef PNG_VERSION_INFO_ONLY
-
-#include "pngstruct.h"
-#include "pnginfo.h"
-
-/* Validate the include paths - the include path used to generate pnglibconf.h
- * must match that used in the build, or we must be using pnglibconf.h.prebuilt:
- */
-#if PNG_ZLIB_VERNUM != 0 && PNG_ZLIB_VERNUM != ZLIB_VERNUM
-#  error ZLIB_VERNUM != PNG_ZLIB_VERNUM \
-      "-I (include path) error: see the notes in pngpriv.h"
-   /* This means that when pnglibconf.h was built the copy of zlib.h that it
-    * used is not the same as the one being used here.  Because the build of
-    * libpng makes decisions to use inflateInit2 and inflateReset2 based on the
-    * zlib version number and because this affects handling of certain broken
-    * PNG files the -I directives must match.
-    *
-    * The most likely explanation is that you passed a -I in CFLAGS. This will
-    * not work; all the preprocessor directives and in particular all the -I
-    * directives must be in CPPFLAGS.
-    */
-#endif
-
-/* This is used for 16-bit gamma tables -- only the top level pointers are
- * const; this could be changed:
- */
-typedef const png_uint_16p * png_const_uint_16pp;
-
-/* Added to libpng-1.5.7: sRGB conversion tables */
-#if defined(PNG_SIMPLIFIED_READ_SUPPORTED) ||\
-   defined(PNG_SIMPLIFIED_WRITE_SUPPORTED)
-#ifdef PNG_SIMPLIFIED_READ_SUPPORTED
-PNG_INTERNAL_DATA(const png_uint_16, png_sRGB_table, [256]);
-   /* Convert from an sRGB encoded value 0..255 to a 16-bit linear value,
-    * 0..65535.  This table gives the closest 16-bit answers (no errors).
-    */
-#endif
-
-PNG_INTERNAL_DATA(const png_uint_16, png_sRGB_base, [512]);
-PNG_INTERNAL_DATA(const png_byte, png_sRGB_delta, [512]);
-
-#define PNG_sRGB_FROM_LINEAR(linear) \
-  ((png_byte)(0xff & ((png_sRGB_base[(linear)>>15] \
-   + ((((linear) & 0x7fff)*png_sRGB_delta[(linear)>>15])>>12)) >> 8)))
-   /* Given a value 'linear' in the range 0..255*65535 calculate the 8-bit sRGB
-    * encoded value with maximum error 0.646365.  Note that the input is not a
-    * 16-bit value; it has been multiplied by 255! */
-#endif /* SIMPLIFIED_READ/WRITE */
-
-
-/* Inhibit C++ name-mangling for libpng functions but not for system calls. */
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/* Internal functions; these are not exported from a DLL however because they
- * are used within several of the C source files they have to be C extern.
- *
- * All of these functions must be declared with PNG_INTERNAL_FUNCTION.
- */
-
-/* Zlib support */
-#define PNG_UNEXPECTED_ZLIB_RETURN (-7)
-PNG_INTERNAL_FUNCTION(void, png_zstream_error,(png_structrp png_ptr, int ret),
-   PNG_EMPTY);
-   /* Used by the zlib handling functions to ensure that z_stream::msg is always
-    * set before they return.
-    */
-
-#ifdef PNG_WRITE_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_free_buffer_list,(png_structrp png_ptr,
-   png_compression_bufferp *list),PNG_EMPTY);
-   /* Free the buffer list used by the compressed write code. */
-#endif
-
-#if defined(PNG_FLOATING_POINT_SUPPORTED) && \
-   !defined(PNG_FIXED_POINT_MACRO_SUPPORTED) && \
-   (defined(PNG_gAMA_SUPPORTED) || defined(PNG_cHRM_SUPPORTED) || \
-   defined(PNG_sCAL_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) || \
-   defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)) || \
-   (defined(PNG_sCAL_SUPPORTED) && \
-   defined(PNG_FLOATING_ARITHMETIC_SUPPORTED))
-PNG_INTERNAL_FUNCTION(png_fixed_point,png_fixed,(png_const_structrp png_ptr,
-   double fp, png_const_charp text),PNG_EMPTY);
-#endif
-
-/* Check the user version string for compatibility, returns false if the version
- * numbers aren't compatible.
- */
-PNG_INTERNAL_FUNCTION(int,png_user_version_check,(png_structrp png_ptr,
-   png_const_charp user_png_ver),PNG_EMPTY);
-
-/* Internal base allocator - no messages, NULL on failure to allocate.  This
- * does, however, call the application provided allocator and that could call
- * png_error (although that would be a bug in the application implementation.)
- */
-PNG_INTERNAL_FUNCTION(png_voidp,png_malloc_base,(png_const_structrp png_ptr,
-   png_alloc_size_t size),PNG_ALLOCATED);
-
-#if defined(PNG_TEXT_SUPPORTED) || defined(PNG_sPLT_SUPPORTED) ||\
-   defined(PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED)
-/* Internal array allocator, outputs no error or warning messages on failure,
- * just returns NULL.
- */
-PNG_INTERNAL_FUNCTION(png_voidp,png_malloc_array,(png_const_structrp png_ptr,
-   int nelements, size_t element_size),PNG_ALLOCATED);
-
-/* The same but an existing array is extended by add_elements.  This function
- * also memsets the new elements to 0 and copies the old elements.  The old
- * array is not freed or altered.
- */
-PNG_INTERNAL_FUNCTION(png_voidp,png_realloc_array,(png_const_structrp png_ptr,
-   png_const_voidp array, int old_elements, int add_elements,
-   size_t element_size),PNG_ALLOCATED);
-#endif /* text, sPLT or unknown chunks */
-
-/* Magic to create a struct when there is no struct to call the user supplied
- * memory allocators.  Because error handling has not been set up the memory
- * handlers can't safely call png_error, but this is an obscure and undocumented
- * restriction so libpng has to assume that the 'free' handler, at least, might
- * call png_error.
- */
-PNG_INTERNAL_FUNCTION(png_structp,png_create_png_struct,
-   (png_const_charp user_png_ver, png_voidp error_ptr, png_error_ptr error_fn,
-    png_error_ptr warn_fn, png_voidp mem_ptr, png_malloc_ptr malloc_fn,
-    png_free_ptr free_fn),PNG_ALLOCATED);
-
-/* Free memory from internal libpng struct */
-PNG_INTERNAL_FUNCTION(void,png_destroy_png_struct,(png_structrp png_ptr),
-   PNG_EMPTY);
-
-/* Free an allocated jmp_buf (always succeeds) */
-PNG_INTERNAL_FUNCTION(void,png_free_jmpbuf,(png_structrp png_ptr),PNG_EMPTY);
-
-/* Function to allocate memory for zlib.  PNGAPI is disallowed. */
-PNG_INTERNAL_FUNCTION(voidpf,png_zalloc,(voidpf png_ptr, uInt items, uInt size),
-   PNG_ALLOCATED);
-
-/* Function to free memory for zlib.  PNGAPI is disallowed. */
-PNG_INTERNAL_FUNCTION(void,png_zfree,(voidpf png_ptr, voidpf ptr),PNG_EMPTY);
-
-/* Next four functions are used internally as callbacks.  PNGCBAPI is required
- * but not PNG_EXPORT.  PNGAPI added at libpng version 1.2.3, changed to
- * PNGCBAPI at 1.5.0
- */
-
-PNG_INTERNAL_FUNCTION(void PNGCBAPI,png_default_read_data,(png_structp png_ptr,
-    png_bytep data, size_t length),PNG_EMPTY);
-
-#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
-PNG_INTERNAL_FUNCTION(void PNGCBAPI,png_push_fill_buffer,(png_structp png_ptr,
-    png_bytep buffer, size_t length),PNG_EMPTY);
-#endif
-
-PNG_INTERNAL_FUNCTION(void PNGCBAPI,png_default_write_data,(png_structp png_ptr,
-    png_bytep data, size_t length),PNG_EMPTY);
-
-#ifdef PNG_WRITE_FLUSH_SUPPORTED
-#  ifdef PNG_STDIO_SUPPORTED
-PNG_INTERNAL_FUNCTION(void PNGCBAPI,png_default_flush,(png_structp png_ptr),
-   PNG_EMPTY);
-#  endif
-#endif
-
-/* Reset the CRC variable */
-PNG_INTERNAL_FUNCTION(void,png_reset_crc,(png_structrp png_ptr),PNG_EMPTY);
-
-/* Write the "data" buffer to whatever output you are using */
-PNG_INTERNAL_FUNCTION(void,png_write_data,(png_structrp png_ptr,
-    png_const_bytep data, size_t length),PNG_EMPTY);
-
-/* Read and check the PNG file signature */
-PNG_INTERNAL_FUNCTION(void,png_read_sig,(png_structrp png_ptr,
-   png_inforp info_ptr),PNG_EMPTY);
-
-/* Read the chunk header (length + type name) */
-PNG_INTERNAL_FUNCTION(png_uint_32,png_read_chunk_header,(png_structrp png_ptr),
-   PNG_EMPTY);
-
-/* Read data from whatever input you are using into the "data" buffer */
-PNG_INTERNAL_FUNCTION(void,png_read_data,(png_structrp png_ptr, png_bytep data,
-    size_t length),PNG_EMPTY);
-
-/* Read bytes into buf, and update png_ptr->crc */
-PNG_INTERNAL_FUNCTION(void,png_crc_read,(png_structrp png_ptr, png_bytep buf,
-    png_uint_32 length),PNG_EMPTY);
-
-/* Read "skip" bytes, read the file crc, and (optionally) verify png_ptr->crc */
-PNG_INTERNAL_FUNCTION(int,png_crc_finish,(png_structrp png_ptr,
-   png_uint_32 skip),PNG_EMPTY);
-
-/* Read the CRC from the file and compare it to the libpng calculated CRC */
-PNG_INTERNAL_FUNCTION(int,png_crc_error,(png_structrp png_ptr),PNG_EMPTY);
-
-/* Calculate the CRC over a section of data.  Note that we are only
- * passing a maximum of 64K on systems that have this as a memory limit,
- * since this is the maximum buffer size we can specify.
- */
-PNG_INTERNAL_FUNCTION(void,png_calculate_crc,(png_structrp png_ptr,
-   png_const_bytep ptr, size_t length),PNG_EMPTY);
-
-#ifdef PNG_WRITE_FLUSH_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_flush,(png_structrp png_ptr),PNG_EMPTY);
-#endif
-
-/* Write various chunks */
-
-/* Write the IHDR chunk, and update the png_struct with the necessary
- * information.
- */
-PNG_INTERNAL_FUNCTION(void,png_write_IHDR,(png_structrp png_ptr,
-   png_uint_32 width, png_uint_32 height, int bit_depth, int color_type,
-   int compression_method, int filter_method, int interlace_method),PNG_EMPTY);
-
-PNG_INTERNAL_FUNCTION(void,png_write_PLTE,(png_structrp png_ptr,
-   png_const_colorp palette, png_uint_32 num_pal),PNG_EMPTY);
-
-PNG_INTERNAL_FUNCTION(void,png_compress_IDAT,(png_structrp png_ptr,
-   png_const_bytep row_data, png_alloc_size_t row_data_length, int flush),
-   PNG_EMPTY);
-
-PNG_INTERNAL_FUNCTION(void,png_write_IEND,(png_structrp png_ptr),PNG_EMPTY);
-
-#ifdef PNG_WRITE_gAMA_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_write_gAMA_fixed,(png_structrp png_ptr,
-    png_fixed_point file_gamma),PNG_EMPTY);
-#endif
-
-#ifdef PNG_WRITE_sBIT_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_write_sBIT,(png_structrp png_ptr,
-    png_const_color_8p sbit, int color_type),PNG_EMPTY);
-#endif
-
-#ifdef PNG_WRITE_cHRM_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_write_cHRM_fixed,(png_structrp png_ptr,
-    const png_xy *xy), PNG_EMPTY);
-   /* The xy value must have been previously validated */
-#endif
-
-#ifdef PNG_WRITE_sRGB_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_write_sRGB,(png_structrp png_ptr,
-    int intent),PNG_EMPTY);
-#endif
-
-#ifdef PNG_WRITE_eXIf_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_write_eXIf,(png_structrp png_ptr,
-    png_bytep exif, int num_exif),PNG_EMPTY);
-#endif
-
-#ifdef PNG_WRITE_iCCP_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_write_iCCP,(png_structrp png_ptr,
-   png_const_charp name, png_const_bytep profile), PNG_EMPTY);
-   /* The profile must have been previously validated for correctness, the
-    * length comes from the first four bytes.  Only the base, deflate,
-    * compression is supported.
-    */
-#endif
-
-#ifdef PNG_WRITE_sPLT_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_write_sPLT,(png_structrp png_ptr,
-    png_const_sPLT_tp palette),PNG_EMPTY);
-#endif
-
-#ifdef PNG_WRITE_tRNS_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_write_tRNS,(png_structrp png_ptr,
-    png_const_bytep trans, png_const_color_16p values, int number,
-    int color_type),PNG_EMPTY);
-#endif
-
-#ifdef PNG_WRITE_bKGD_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_write_bKGD,(png_structrp png_ptr,
-    png_const_color_16p values, int color_type),PNG_EMPTY);
-#endif
-
-#ifdef PNG_WRITE_hIST_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_write_hIST,(png_structrp png_ptr,
-    png_const_uint_16p hist, int num_hist),PNG_EMPTY);
-#endif
-
-/* Chunks that have keywords */
-#ifdef PNG_WRITE_tEXt_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_write_tEXt,(png_structrp png_ptr,
-   png_const_charp key, png_const_charp text, size_t text_len),PNG_EMPTY);
-#endif
-
-#ifdef PNG_WRITE_zTXt_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_write_zTXt,(png_structrp png_ptr, png_const_charp
-    key, png_const_charp text, int compression),PNG_EMPTY);
-#endif
-
-#ifdef PNG_WRITE_iTXt_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_write_iTXt,(png_structrp png_ptr,
-    int compression, png_const_charp key, png_const_charp lang,
-    png_const_charp lang_key, png_const_charp text),PNG_EMPTY);
-#endif
-
-#ifdef PNG_TEXT_SUPPORTED  /* Added at version 1.0.14 and 1.2.4 */
-PNG_INTERNAL_FUNCTION(int,png_set_text_2,(png_const_structrp png_ptr,
-    png_inforp info_ptr, png_const_textp text_ptr, int num_text),PNG_EMPTY);
-#endif
-
-#ifdef PNG_WRITE_oFFs_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_write_oFFs,(png_structrp png_ptr,
-    png_int_32 x_offset, png_int_32 y_offset, int unit_type),PNG_EMPTY);
-#endif
-
-#ifdef PNG_WRITE_pCAL_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_write_pCAL,(png_structrp png_ptr,
-    png_charp purpose, png_int_32 X0, png_int_32 X1, int type, int nparams,
-    png_const_charp units, png_charpp params),PNG_EMPTY);
-#endif
-
-#ifdef PNG_WRITE_pHYs_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_write_pHYs,(png_structrp png_ptr,
-    png_uint_32 x_pixels_per_unit, png_uint_32 y_pixels_per_unit,
-    int unit_type),PNG_EMPTY);
-#endif
-
-#ifdef PNG_WRITE_tIME_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_write_tIME,(png_structrp png_ptr,
-    png_const_timep mod_time),PNG_EMPTY);
-#endif
-
-#ifdef PNG_WRITE_sCAL_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_write_sCAL_s,(png_structrp png_ptr,
-    int unit, png_const_charp width, png_const_charp height),PNG_EMPTY);
-#endif
-
-/* Called when finished processing a row of data */
-PNG_INTERNAL_FUNCTION(void,png_write_finish_row,(png_structrp png_ptr),
-    PNG_EMPTY);
-
-/* Internal use only.   Called before first row of data */
-PNG_INTERNAL_FUNCTION(void,png_write_start_row,(png_structrp png_ptr),
-    PNG_EMPTY);
-
-/* Combine a row of data, dealing with alpha, etc. if requested.  'row' is an
- * array of png_ptr->width pixels.  If the image is not interlaced or this
- * is the final pass this just does a memcpy, otherwise the "display" flag
- * is used to determine whether to copy pixels that are not in the current pass.
- *
- * Because 'png_do_read_interlace' (below) replicates pixels this allows this
- * function to achieve the documented 'blocky' appearance during interlaced read
- * if display is 1 and the 'sparkle' appearance, where existing pixels in 'row'
- * are not changed if they are not in the current pass, when display is 0.
- *
- * 'display' must be 0 or 1, otherwise the memcpy will be done regardless.
- *
- * The API always reads from the png_struct row buffer and always assumes that
- * it is full width (png_do_read_interlace has already been called.)
- *
- * This function is only ever used to write to row buffers provided by the
- * caller of the relevant libpng API and the row must have already been
- * transformed by the read transformations.
- *
- * The PNG_USE_COMPILE_TIME_MASKS option causes generation of pre-computed
- * bitmasks for use within the code, otherwise runtime generated masks are used.
- * The default is compile time masks.
- */
-#ifndef PNG_USE_COMPILE_TIME_MASKS
-#  define PNG_USE_COMPILE_TIME_MASKS 1
-#endif
-PNG_INTERNAL_FUNCTION(void,png_combine_row,(png_const_structrp png_ptr,
-    png_bytep row, int display),PNG_EMPTY);
-
-#ifdef PNG_READ_INTERLACING_SUPPORTED
-/* Expand an interlaced row: the 'row_info' describes the pass data that has
- * been read in and must correspond to the pixels in 'row', the pixels are
- * expanded (moved apart) in 'row' to match the final layout, when doing this
- * the pixels are *replicated* to the intervening space.  This is essential for
- * the correct operation of png_combine_row, above.
- */
-PNG_INTERNAL_FUNCTION(void,png_do_read_interlace,(png_row_infop row_info,
-    png_bytep row, int pass, png_uint_32 transformations),PNG_EMPTY);
-#endif
-
-/* GRR TO DO (2.0 or whenever):  simplify other internal calling interfaces */
-
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-/* Grab pixels out of a row for an interlaced pass */
-PNG_INTERNAL_FUNCTION(void,png_do_write_interlace,(png_row_infop row_info,
-    png_bytep row, int pass),PNG_EMPTY);
-#endif
-
-/* Unfilter a row: check the filter value before calling this, there is no point
- * calling it for PNG_FILTER_VALUE_NONE.
- */
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row,(png_structrp pp, png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row, int filter),PNG_EMPTY);
-
-#if PNG_ARM_NEON_OPT > 0
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_neon,(png_row_infop row_info,
-    png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_neon,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_neon,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_neon,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_neon,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_neon,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_neon,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-#endif
-
-#if PNG_MIPS_MSA_IMPLEMENTATION == 1
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_msa,(png_row_infop row_info,
-    png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_msa,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_msa,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_msa,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_msa,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_msa,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_msa,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-#endif
-
-#if PNG_MIPS_MMI_IMPLEMENTATION > 0
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_mmi,(png_row_infop row_info,
-    png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_mmi,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_mmi,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_mmi,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_mmi,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_mmi,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_mmi,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-#endif
-
-#if PNG_POWERPC_VSX_OPT > 0
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_vsx,(png_row_infop row_info,
-    png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_vsx,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_vsx,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_vsx,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_vsx,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_vsx,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_vsx,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-#endif
-
-#if PNG_INTEL_SSE_IMPLEMENTATION > 0
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-#endif
-
-#if PNG_LOONGARCH_LSX_IMPLEMENTATION == 1
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_lsx,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_lsx,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_lsx,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_lsx,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_lsx,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_lsx,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_lsx,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-#endif
-
-/* Choose the best filter to use and filter the row data */
-PNG_INTERNAL_FUNCTION(void,png_write_find_filter,(png_structrp png_ptr,
-    png_row_infop row_info),PNG_EMPTY);
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_read_IDAT_data,(png_structrp png_ptr,
-   png_bytep output, png_alloc_size_t avail_out),PNG_EMPTY);
-   /* Read 'avail_out' bytes of data from the IDAT stream.  If the output buffer
-    * is NULL the function checks, instead, for the end of the stream.  In this
-    * case a benign error will be issued if the stream end is not found or if
-    * extra data has to be consumed.
-    */
-PNG_INTERNAL_FUNCTION(void,png_read_finish_IDAT,(png_structrp png_ptr),
-   PNG_EMPTY);
-   /* This cleans up when the IDAT LZ stream does not end when the last image
-    * byte is read; there is still some pending input.
-    */
-
-PNG_INTERNAL_FUNCTION(void,png_read_finish_row,(png_structrp png_ptr),
-   PNG_EMPTY);
-   /* Finish a row while reading, dealing with interlacing passes, etc. */
-#endif /* SEQUENTIAL_READ */
-
-/* Initialize the row buffers, etc. */
-PNG_INTERNAL_FUNCTION(void,png_read_start_row,(png_structrp png_ptr),PNG_EMPTY);
-
-#if ZLIB_VERNUM >= 0x1240
-PNG_INTERNAL_FUNCTION(int,png_zlib_inflate,(png_structrp png_ptr, int flush),
-      PNG_EMPTY);
-#  define PNG_INFLATE(pp, flush) png_zlib_inflate(pp, flush)
-#else /* Zlib < 1.2.4 */
-#  define PNG_INFLATE(pp, flush) inflate(&(pp)->zstream, flush)
-#endif /* Zlib < 1.2.4 */
-
-#ifdef PNG_READ_TRANSFORMS_SUPPORTED
-/* Optional call to update the users info structure */
-PNG_INTERNAL_FUNCTION(void,png_read_transform_info,(png_structrp png_ptr,
-    png_inforp info_ptr),PNG_EMPTY);
-#endif
-
-/* Shared transform functions, defined in pngtran.c */
-#if defined(PNG_WRITE_FILLER_SUPPORTED) || \
-    defined(PNG_READ_STRIP_ALPHA_SUPPORTED)
-PNG_INTERNAL_FUNCTION(void,png_do_strip_channel,(png_row_infop row_info,
-    png_bytep row, int at_start),PNG_EMPTY);
-#endif
-
-#ifdef PNG_16BIT_SUPPORTED
-#if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED)
-PNG_INTERNAL_FUNCTION(void,png_do_swap,(png_row_infop row_info,
-    png_bytep row),PNG_EMPTY);
-#endif
-#endif
-
-#if defined(PNG_READ_PACKSWAP_SUPPORTED) || \
-    defined(PNG_WRITE_PACKSWAP_SUPPORTED)
-PNG_INTERNAL_FUNCTION(void,png_do_packswap,(png_row_infop row_info,
-    png_bytep row),PNG_EMPTY);
-#endif
-
-#if defined(PNG_READ_INVERT_SUPPORTED) || defined(PNG_WRITE_INVERT_SUPPORTED)
-PNG_INTERNAL_FUNCTION(void,png_do_invert,(png_row_infop row_info,
-    png_bytep row),PNG_EMPTY);
-#endif
-
-#if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED)
-PNG_INTERNAL_FUNCTION(void,png_do_bgr,(png_row_infop row_info,
-    png_bytep row),PNG_EMPTY);
-#endif
-
-/* The following decodes the appropriate chunks, and does error correction,
- * then calls the appropriate callback for the chunk if it is valid.
- */
-
-/* Decode the IHDR chunk */
-PNG_INTERNAL_FUNCTION(void,png_handle_IHDR,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_handle_PLTE,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_handle_IEND,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-
-#ifdef PNG_READ_bKGD_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_handle_bKGD,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-#endif
-
-#ifdef PNG_READ_cHRM_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_handle_cHRM,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-#endif
-
-#ifdef PNG_READ_eXIf_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_handle_eXIf,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-#endif
-
-#ifdef PNG_READ_gAMA_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_handle_gAMA,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-#endif
-
-#ifdef PNG_READ_hIST_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_handle_hIST,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-#endif
-
-#ifdef PNG_READ_iCCP_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_handle_iCCP,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-#endif /* READ_iCCP */
-
-#ifdef PNG_READ_iTXt_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_handle_iTXt,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-#endif
-
-#ifdef PNG_READ_oFFs_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_handle_oFFs,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-#endif
-
-#ifdef PNG_READ_pCAL_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_handle_pCAL,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-#endif
-
-#ifdef PNG_READ_pHYs_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_handle_pHYs,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-#endif
-
-#ifdef PNG_READ_sBIT_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_handle_sBIT,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-#endif
-
-#ifdef PNG_READ_sCAL_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_handle_sCAL,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-#endif
-
-#ifdef PNG_READ_sPLT_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_handle_sPLT,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-#endif /* READ_sPLT */
-
-#ifdef PNG_READ_sRGB_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_handle_sRGB,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-#endif
-
-#ifdef PNG_READ_tEXt_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_handle_tEXt,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-#endif
-
-#ifdef PNG_READ_tIME_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_handle_tIME,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-#endif
-
-#ifdef PNG_READ_tRNS_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_handle_tRNS,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-#endif
-
-#ifdef PNG_READ_zTXt_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_handle_zTXt,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-#endif
-
-PNG_INTERNAL_FUNCTION(void,png_check_chunk_name,(png_const_structrp png_ptr,
-    png_uint_32 chunk_name),PNG_EMPTY);
-
-PNG_INTERNAL_FUNCTION(void,png_check_chunk_length,(png_const_structrp png_ptr,
-    png_uint_32 chunk_length),PNG_EMPTY);
-
-PNG_INTERNAL_FUNCTION(void,png_handle_unknown,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length, int keep),PNG_EMPTY);
-   /* This is the function that gets called for unknown chunks.  The 'keep'
-    * argument is either non-zero for a known chunk that has been set to be
-    * handled as unknown or zero for an unknown chunk.  By default the function
-    * just skips the chunk or errors out if it is critical.
-    */
-
-#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED) ||\
-    defined(PNG_HANDLE_AS_UNKNOWN_SUPPORTED)
-PNG_INTERNAL_FUNCTION(int,png_chunk_unknown_handling,
-    (png_const_structrp png_ptr, png_uint_32 chunk_name),PNG_EMPTY);
-   /* Exactly as the API png_handle_as_unknown() except that the argument is a
-    * 32-bit chunk name, not a string.
-    */
-#endif /* READ_UNKNOWN_CHUNKS || HANDLE_AS_UNKNOWN */
-
-/* Handle the transformations for reading and writing */
-#ifdef PNG_READ_TRANSFORMS_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_do_read_transformations,(png_structrp png_ptr,
-   png_row_infop row_info),PNG_EMPTY);
-#endif
-#ifdef PNG_WRITE_TRANSFORMS_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_do_write_transformations,(png_structrp png_ptr,
-   png_row_infop row_info),PNG_EMPTY);
-#endif
-
-#ifdef PNG_READ_TRANSFORMS_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_init_read_transformations,(png_structrp png_ptr),
-    PNG_EMPTY);
-#endif
-
-#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_push_read_chunk,(png_structrp png_ptr,
-    png_inforp info_ptr),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_push_read_sig,(png_structrp png_ptr,
-    png_inforp info_ptr),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_push_check_crc,(png_structrp png_ptr),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_push_save_buffer,(png_structrp png_ptr),
-    PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_push_restore_buffer,(png_structrp png_ptr,
-    png_bytep buffer, size_t buffer_length),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_push_read_IDAT,(png_structrp png_ptr),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_process_IDAT_data,(png_structrp png_ptr,
-    png_bytep buffer, size_t buffer_length),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_push_process_row,(png_structrp png_ptr),
-    PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_push_handle_unknown,(png_structrp png_ptr,
-   png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_push_have_info,(png_structrp png_ptr,
-   png_inforp info_ptr),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_push_have_end,(png_structrp png_ptr,
-   png_inforp info_ptr),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_push_have_row,(png_structrp png_ptr,
-    png_bytep row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_push_read_end,(png_structrp png_ptr,
-    png_inforp info_ptr),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_process_some_data,(png_structrp png_ptr,
-    png_inforp info_ptr),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_push_finish_row,(png_structrp png_ptr),
-    PNG_EMPTY);
-#  ifdef PNG_READ_tEXt_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_push_handle_tEXt,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_push_read_tEXt,(png_structrp png_ptr,
-    png_inforp info_ptr),PNG_EMPTY);
-#  endif
-#  ifdef PNG_READ_zTXt_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_push_handle_zTXt,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_push_read_zTXt,(png_structrp png_ptr,
-    png_inforp info_ptr),PNG_EMPTY);
-#  endif
-#  ifdef PNG_READ_iTXt_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_push_handle_iTXt,(png_structrp png_ptr,
-    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_push_read_iTXt,(png_structrp png_ptr,
-    png_inforp info_ptr),PNG_EMPTY);
-#  endif
-
-#endif /* PROGRESSIVE_READ */
-
-/* Added at libpng version 1.6.0 */
-#ifdef PNG_GAMMA_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_colorspace_set_gamma,(png_const_structrp png_ptr,
-    png_colorspacerp colorspace, png_fixed_point gAMA), PNG_EMPTY);
-   /* Set the colorspace gamma with a value provided by the application or by
-    * the gAMA chunk on read.  The value will override anything set by an ICC
-    * profile.
-    */
-
-PNG_INTERNAL_FUNCTION(void,png_colorspace_sync_info,(png_const_structrp png_ptr,
-    png_inforp info_ptr), PNG_EMPTY);
-   /* Synchronize the info 'valid' flags with the colorspace */
-
-PNG_INTERNAL_FUNCTION(void,png_colorspace_sync,(png_const_structrp png_ptr,
-    png_inforp info_ptr), PNG_EMPTY);
-   /* Copy the png_struct colorspace to the info_struct and call the above to
-    * synchronize the flags.  Checks for NULL info_ptr and does nothing.
-    */
-#endif
-
-/* Added at libpng version 1.4.0 */
-#ifdef PNG_COLORSPACE_SUPPORTED
-/* These internal functions are for maintaining the colorspace structure within
- * a png_info or png_struct (or, indeed, both).
- */
-PNG_INTERNAL_FUNCTION(int,png_colorspace_set_chromaticities,
-   (png_const_structrp png_ptr, png_colorspacerp colorspace, const png_xy *xy,
-    int preferred), PNG_EMPTY);
-
-PNG_INTERNAL_FUNCTION(int,png_colorspace_set_endpoints,
-   (png_const_structrp png_ptr, png_colorspacerp colorspace, const png_XYZ *XYZ,
-    int preferred), PNG_EMPTY);
-
-#ifdef PNG_sRGB_SUPPORTED
-PNG_INTERNAL_FUNCTION(int,png_colorspace_set_sRGB,(png_const_structrp png_ptr,
-   png_colorspacerp colorspace, int intent), PNG_EMPTY);
-   /* This does set the colorspace gAMA and cHRM values too, but doesn't set the
-    * flags to write them, if it returns false there was a problem and an error
-    * message has already been output (but the colorspace may still need to be
-    * synced to record the invalid flag).
-    */
-#endif /* sRGB */
-
-#ifdef PNG_iCCP_SUPPORTED
-PNG_INTERNAL_FUNCTION(int,png_colorspace_set_ICC,(png_const_structrp png_ptr,
-   png_colorspacerp colorspace, png_const_charp name,
-   png_uint_32 profile_length, png_const_bytep profile, int color_type),
-   PNG_EMPTY);
-   /* The 'name' is used for information only */
-
-/* Routines for checking parts of an ICC profile. */
-#ifdef PNG_READ_iCCP_SUPPORTED
-PNG_INTERNAL_FUNCTION(int,png_icc_check_length,(png_const_structrp png_ptr,
-   png_colorspacerp colorspace, png_const_charp name,
-   png_uint_32 profile_length), PNG_EMPTY);
-#endif /* READ_iCCP */
-PNG_INTERNAL_FUNCTION(int,png_icc_check_header,(png_const_structrp png_ptr,
-   png_colorspacerp colorspace, png_const_charp name,
-   png_uint_32 profile_length,
-   png_const_bytep profile /* first 132 bytes only */, int color_type),
-   PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(int,png_icc_check_tag_table,(png_const_structrp png_ptr,
-   png_colorspacerp colorspace, png_const_charp name,
-   png_uint_32 profile_length,
-   png_const_bytep profile /* header plus whole tag table */), PNG_EMPTY);
-#ifdef PNG_sRGB_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_icc_set_sRGB,(
-   png_const_structrp png_ptr, png_colorspacerp colorspace,
-   png_const_bytep profile, uLong adler), PNG_EMPTY);
-   /* 'adler' is the Adler32 checksum of the uncompressed profile data. It may
-    * be zero to indicate that it is not available.  It is used, if provided,
-    * as a fast check on the profile when checking to see if it is sRGB.
-    */
-#endif
-#endif /* iCCP */
-
-#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_colorspace_set_rgb_coefficients,
-   (png_structrp png_ptr), PNG_EMPTY);
-   /* Set the rgb_to_gray coefficients from the colorspace Y values */
-#endif /* READ_RGB_TO_GRAY */
-#endif /* COLORSPACE */
-
-/* Added at libpng version 1.4.0 */
-PNG_INTERNAL_FUNCTION(void,png_check_IHDR,(png_const_structrp png_ptr,
-    png_uint_32 width, png_uint_32 height, int bit_depth,
-    int color_type, int interlace_type, int compression_type,
-    int filter_type),PNG_EMPTY);
-
-/* Added at libpng version 1.5.10 */
-#if defined(PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED) || \
-    defined(PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED)
-PNG_INTERNAL_FUNCTION(void,png_do_check_palette_indexes,
-   (png_structrp png_ptr, png_row_infop row_info),PNG_EMPTY);
-#endif
-
-#if defined(PNG_FLOATING_POINT_SUPPORTED) && defined(PNG_ERROR_TEXT_SUPPORTED)
-PNG_INTERNAL_FUNCTION(void,png_fixed_error,(png_const_structrp png_ptr,
-   png_const_charp name),PNG_NORETURN);
-#endif
-
-/* Puts 'string' into 'buffer' at buffer[pos], taking care never to overwrite
- * the end.  Always leaves the buffer nul terminated.  Never errors out (and
- * there is no error code.)
- */
-PNG_INTERNAL_FUNCTION(size_t,png_safecat,(png_charp buffer, size_t bufsize,
-   size_t pos, png_const_charp string),PNG_EMPTY);
-
-/* Various internal functions to handle formatted warning messages, currently
- * only implemented for warnings.
- */
-#if defined(PNG_WARNINGS_SUPPORTED) || defined(PNG_TIME_RFC1123_SUPPORTED)
-/* Utility to dump an unsigned value into a buffer, given a start pointer and
- * and end pointer (which should point just *beyond* the end of the buffer!)
- * Returns the pointer to the start of the formatted string.  This utility only
- * does unsigned values.
- */
-PNG_INTERNAL_FUNCTION(png_charp,png_format_number,(png_const_charp start,
-   png_charp end, int format, png_alloc_size_t number),PNG_EMPTY);
-
-/* Convenience macro that takes an array: */
-#define PNG_FORMAT_NUMBER(buffer,format,number) \
-   png_format_number(buffer, buffer + (sizeof buffer), format, number)
-
-/* Suggested size for a number buffer (enough for 64 bits and a sign!) */
-#define PNG_NUMBER_BUFFER_SIZE 24
-
-/* These are the integer formats currently supported, the name is formed from
- * the standard printf(3) format string.
- */
-#define PNG_NUMBER_FORMAT_u     1 /* chose unsigned API! */
-#define PNG_NUMBER_FORMAT_02u   2
-#define PNG_NUMBER_FORMAT_d     1 /* chose signed API! */
-#define PNG_NUMBER_FORMAT_02d   2
-#define PNG_NUMBER_FORMAT_x     3
-#define PNG_NUMBER_FORMAT_02x   4
-#define PNG_NUMBER_FORMAT_fixed 5 /* choose the signed API */
-#endif
-
-#ifdef PNG_WARNINGS_SUPPORTED
-/* New defines and members adding in libpng-1.5.4 */
-#  define PNG_WARNING_PARAMETER_SIZE 32
-#  define PNG_WARNING_PARAMETER_COUNT 8 /* Maximum 9; see pngerror.c */
-
-/* An l-value of this type has to be passed to the APIs below to cache the
- * values of the parameters to a formatted warning message.
- */
-typedef char png_warning_parameters[PNG_WARNING_PARAMETER_COUNT][
-   PNG_WARNING_PARAMETER_SIZE];
-
-PNG_INTERNAL_FUNCTION(void,png_warning_parameter,(png_warning_parameters p,
-   int number, png_const_charp string),PNG_EMPTY);
-   /* Parameters are limited in size to PNG_WARNING_PARAMETER_SIZE characters,
-    * including the trailing '\0'.
-    */
-PNG_INTERNAL_FUNCTION(void,png_warning_parameter_unsigned,
-   (png_warning_parameters p, int number, int format, png_alloc_size_t value),
-   PNG_EMPTY);
-   /* Use png_alloc_size_t because it is an unsigned type as big as any we
-    * need to output.  Use the following for a signed value.
-    */
-PNG_INTERNAL_FUNCTION(void,png_warning_parameter_signed,
-   (png_warning_parameters p, int number, int format, png_int_32 value),
-   PNG_EMPTY);
-
-PNG_INTERNAL_FUNCTION(void,png_formatted_warning,(png_const_structrp png_ptr,
-   png_warning_parameters p, png_const_charp message),PNG_EMPTY);
-   /* 'message' follows the X/Open approach of using @1, @2 to insert
-    * parameters previously supplied using the above functions.  Errors in
-    * specifying the parameters will simply result in garbage substitutions.
-    */
-#endif
-
-#ifdef PNG_BENIGN_ERRORS_SUPPORTED
-/* Application errors (new in 1.6); use these functions (declared below) for
- * errors in the parameters or order of API function calls on read.  The
- * 'warning' should be used for an error that can be handled completely; the
- * 'error' for one which can be handled safely but which may lose application
- * information or settings.
- *
- * By default these both result in a png_error call prior to release, while in a
- * released version the 'warning' is just a warning.  However if the application
- * explicitly disables benign errors (explicitly permitting the code to lose
- * information) they both turn into warnings.
- *
- * If benign errors aren't supported they end up as the corresponding base call
- * (png_warning or png_error.)
- */
-PNG_INTERNAL_FUNCTION(void,png_app_warning,(png_const_structrp png_ptr,
-   png_const_charp message),PNG_EMPTY);
-   /* The application provided invalid parameters to an API function or called
-    * an API function at the wrong time, libpng can completely recover.
-    */
-
-PNG_INTERNAL_FUNCTION(void,png_app_error,(png_const_structrp png_ptr,
-   png_const_charp message),PNG_EMPTY);
-   /* As above but libpng will ignore the call, or attempt some other partial
-    * recovery from the error.
-    */
-#else
-#  define png_app_warning(pp,s) png_warning(pp,s)
-#  define png_app_error(pp,s) png_error(pp,s)
-#endif
-
-PNG_INTERNAL_FUNCTION(void,png_chunk_report,(png_const_structrp png_ptr,
-   png_const_charp message, int error),PNG_EMPTY);
-   /* Report a recoverable issue in chunk data.  On read this is used to report
-    * a problem found while reading a particular chunk and the
-    * png_chunk_benign_error or png_chunk_warning function is used as
-    * appropriate.  On write this is used to report an error that comes from
-    * data set via an application call to a png_set_ API and png_app_error or
-    * png_app_warning is used as appropriate.
-    *
-    * The 'error' parameter must have one of the following values:
-    */
-#define PNG_CHUNK_WARNING     0 /* never an error */
-#define PNG_CHUNK_WRITE_ERROR 1 /* an error only on write */
-#define PNG_CHUNK_ERROR       2 /* always an error */
-
-/* ASCII to FP interfaces, currently only implemented if sCAL
- * support is required.
- */
-#if defined(PNG_sCAL_SUPPORTED)
-/* MAX_DIGITS is actually the maximum number of characters in an sCAL
- * width or height, derived from the precision (number of significant
- * digits - a build time settable option) and assumptions about the
- * maximum ridiculous exponent.
- */
-#define PNG_sCAL_MAX_DIGITS (PNG_sCAL_PRECISION+1/*.*/+1/*E*/+10/*exponent*/)
-
-#ifdef PNG_FLOATING_POINT_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_ascii_from_fp,(png_const_structrp png_ptr,
-   png_charp ascii, size_t size, double fp, unsigned int precision),
-   PNG_EMPTY);
-#endif /* FLOATING_POINT */
-
-#ifdef PNG_FIXED_POINT_SUPPORTED
-PNG_INTERNAL_FUNCTION(void,png_ascii_from_fixed,(png_const_structrp png_ptr,
-   png_charp ascii, size_t size, png_fixed_point fp),PNG_EMPTY);
-#endif /* FIXED_POINT */
-#endif /* sCAL */
-
-#if defined(PNG_sCAL_SUPPORTED) || defined(PNG_pCAL_SUPPORTED)
-/* An internal API to validate the format of a floating point number.
- * The result is the index of the next character.  If the number is
- * not valid it will be the index of a character in the supposed number.
- *
- * The format of a number is defined in the PNG extensions specification
- * and this API is strictly conformant to that spec, not anyone elses!
- *
- * The format as a regular expression is:
- *
- * [+-]?[0-9]+.?([Ee][+-]?[0-9]+)?
- *
- * or:
- *
- * [+-]?.[0-9]+(.[0-9]+)?([Ee][+-]?[0-9]+)?
- *
- * The complexity is that either integer or fraction must be present and the
- * fraction is permitted to have no digits only if the integer is present.
- *
- * NOTE: The dangling E problem.
- *   There is a PNG valid floating point number in the following:
- *
- *       PNG floating point numbers are not greedy.
- *
- *   Working this out requires *TWO* character lookahead (because of the
- *   sign), the parser does not do this - it will fail at the 'r' - this
- *   doesn't matter for PNG sCAL chunk values, but it requires more care
- *   if the value were ever to be embedded in something more complex.  Use
- *   ANSI-C strtod if you need the lookahead.
- */
-/* State table for the parser. */
-#define PNG_FP_INTEGER    0  /* before or in integer */
-#define PNG_FP_FRACTION   1  /* before or in fraction */
-#define PNG_FP_EXPONENT   2  /* before or in exponent */
-#define PNG_FP_STATE      3  /* mask for the above */
-#define PNG_FP_SAW_SIGN   4  /* Saw +/- in current state */
-#define PNG_FP_SAW_DIGIT  8  /* Saw a digit in current state */
-#define PNG_FP_SAW_DOT   16  /* Saw a dot in current state */
-#define PNG_FP_SAW_E     32  /* Saw an E (or e) in current state */
-#define PNG_FP_SAW_ANY   60  /* Saw any of the above 4 */
-
-/* These three values don't affect the parser.  They are set but not used.
- */
-#define PNG_FP_WAS_VALID 64  /* Preceding substring is a valid fp number */
-#define PNG_FP_NEGATIVE 128  /* A negative number, including "-0" */
-#define PNG_FP_NONZERO  256  /* A non-zero value */
-#define PNG_FP_STICKY   448  /* The above three flags */
-
-/* This is available for the caller to store in 'state' if required.  Do not
- * call the parser after setting it (the parser sometimes clears it.)
- */
-#define PNG_FP_INVALID  512  /* Available for callers as a distinct value */
-
-/* Result codes for the parser (boolean - true means ok, false means
- * not ok yet.)
- */
-#define PNG_FP_MAYBE      0  /* The number may be valid in the future */
-#define PNG_FP_OK         1  /* The number is valid */
-
-/* Tests on the sticky non-zero and negative flags.  To pass these checks
- * the state must also indicate that the whole number is valid - this is
- * achieved by testing PNG_FP_SAW_DIGIT (see the implementation for why this
- * is equivalent to PNG_FP_OK above.)
- */
-#define PNG_FP_NZ_MASK (PNG_FP_SAW_DIGIT | PNG_FP_NEGATIVE | PNG_FP_NONZERO)
-   /* NZ_MASK: the string is valid and a non-zero negative value */
-#define PNG_FP_Z_MASK (PNG_FP_SAW_DIGIT | PNG_FP_NONZERO)
-   /* Z MASK: the string is valid and a non-zero value. */
-   /* PNG_FP_SAW_DIGIT: the string is valid. */
-#define PNG_FP_IS_ZERO(state) (((state) & PNG_FP_Z_MASK) == PNG_FP_SAW_DIGIT)
-#define PNG_FP_IS_POSITIVE(state) (((state) & PNG_FP_NZ_MASK) == PNG_FP_Z_MASK)
-#define PNG_FP_IS_NEGATIVE(state) (((state) & PNG_FP_NZ_MASK) == PNG_FP_NZ_MASK)
-
-/* The actual parser.  This can be called repeatedly. It updates
- * the index into the string and the state variable (which must
- * be initialized to 0).  It returns a result code, as above.  There
- * is no point calling the parser any more if it fails to advance to
- * the end of the string - it is stuck on an invalid character (or
- * terminated by '\0').
- *
- * Note that the pointer will consume an E or even an E+ and then leave
- * a 'maybe' state even though a preceding integer.fraction is valid.
- * The PNG_FP_WAS_VALID flag indicates that a preceding substring was
- * a valid number.  It's possible to recover from this by calling
- * the parser again (from the start, with state 0) but with a string
- * that omits the last character (i.e. set the size to the index of
- * the problem character.)  This has not been tested within libpng.
- */
-PNG_INTERNAL_FUNCTION(int,png_check_fp_number,(png_const_charp string,
-   size_t size, int *statep, size_t *whereami),PNG_EMPTY);
-
-/* This is the same but it checks a complete string and returns true
- * only if it just contains a floating point number.  As of 1.5.4 this
- * function also returns the state at the end of parsing the number if
- * it was valid (otherwise it returns 0.)  This can be used for testing
- * for negative or zero values using the sticky flag.
- */
-PNG_INTERNAL_FUNCTION(int,png_check_fp_string,(png_const_charp string,
-   size_t size),PNG_EMPTY);
-#endif /* pCAL || sCAL */
-
-#if defined(PNG_GAMMA_SUPPORTED) ||\
-    defined(PNG_INCH_CONVERSIONS_SUPPORTED) || defined(PNG_READ_pHYs_SUPPORTED)
-/* Added at libpng version 1.5.0 */
-/* This is a utility to provide a*times/div (rounded) and indicate
- * if there is an overflow.  The result is a boolean - false (0)
- * for overflow, true (1) if no overflow, in which case *res
- * holds the result.
- */
-PNG_INTERNAL_FUNCTION(int,png_muldiv,(png_fixed_point_p res, png_fixed_point a,
-   png_int_32 multiplied_by, png_int_32 divided_by),PNG_EMPTY);
-#endif
-
-#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_INCH_CONVERSIONS_SUPPORTED)
-/* Same deal, but issue a warning on overflow and return 0. */
-PNG_INTERNAL_FUNCTION(png_fixed_point,png_muldiv_warn,
-   (png_const_structrp png_ptr, png_fixed_point a, png_int_32 multiplied_by,
-   png_int_32 divided_by),PNG_EMPTY);
-#endif
-
-#ifdef PNG_GAMMA_SUPPORTED
-/* Calculate a reciprocal - used for gamma values.  This returns
- * 0 if the argument is 0 in order to maintain an undefined value;
- * there are no warnings.
- */
-PNG_INTERNAL_FUNCTION(png_fixed_point,png_reciprocal,(png_fixed_point a),
-   PNG_EMPTY);
-
-#ifdef PNG_READ_GAMMA_SUPPORTED
-/* The same but gives a reciprocal of the product of two fixed point
- * values.  Accuracy is suitable for gamma calculations but this is
- * not exact - use png_muldiv for that.  Only required at present on read.
- */
-PNG_INTERNAL_FUNCTION(png_fixed_point,png_reciprocal2,(png_fixed_point a,
-   png_fixed_point b),PNG_EMPTY);
-#endif
-
-/* Return true if the gamma value is significantly different from 1.0 */
-PNG_INTERNAL_FUNCTION(int,png_gamma_significant,(png_fixed_point gamma_value),
-   PNG_EMPTY);
-#endif
-
-#ifdef PNG_READ_GAMMA_SUPPORTED
-/* Internal fixed point gamma correction.  These APIs are called as
- * required to convert single values - they don't need to be fast,
- * they are not used when processing image pixel values.
- *
- * While the input is an 'unsigned' value it must actually be the
- * correct bit value - 0..255 or 0..65535 as required.
- */
-PNG_INTERNAL_FUNCTION(png_uint_16,png_gamma_correct,(png_structrp png_ptr,
-   unsigned int value, png_fixed_point gamma_value),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(png_uint_16,png_gamma_16bit_correct,(unsigned int value,
-   png_fixed_point gamma_value),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(png_byte,png_gamma_8bit_correct,(unsigned int value,
-   png_fixed_point gamma_value),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_destroy_gamma_table,(png_structrp png_ptr),
-   PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_build_gamma_table,(png_structrp png_ptr,
-   int bit_depth),PNG_EMPTY);
-#endif
-
-/* SIMPLIFIED READ/WRITE SUPPORT */
-#if defined(PNG_SIMPLIFIED_READ_SUPPORTED) ||\
-   defined(PNG_SIMPLIFIED_WRITE_SUPPORTED)
-/* The internal structure that png_image::opaque points to. */
-typedef struct png_control
-{
-   png_structp png_ptr;
-   png_infop   info_ptr;
-   png_voidp   error_buf;           /* Always a jmp_buf at present. */
-
-   png_const_bytep memory;          /* Memory buffer. */
-   size_t          size;            /* Size of the memory buffer. */
-
-   unsigned int for_write       :1; /* Otherwise it is a read structure */
-   unsigned int owned_file      :1; /* We own the file in io_ptr */
-} png_control;
-
-/* Return the pointer to the jmp_buf from a png_control: necessary because C
- * does not reveal the type of the elements of jmp_buf.
- */
-#ifdef __cplusplus
-#  define png_control_jmp_buf(pc) (((jmp_buf*)((pc)->error_buf))[0])
-#else
-#  define png_control_jmp_buf(pc) ((pc)->error_buf)
-#endif
-
-/* Utility to safely execute a piece of libpng code catching and logging any
- * errors that might occur.  Returns true on success, false on failure (either
- * of the function or as a result of a png_error.)
- */
-PNG_INTERNAL_CALLBACK(void,png_safe_error,(png_structp png_ptr,
-   png_const_charp error_message),PNG_NORETURN);
-
-#ifdef PNG_WARNINGS_SUPPORTED
-PNG_INTERNAL_CALLBACK(void,png_safe_warning,(png_structp png_ptr,
-   png_const_charp warning_message),PNG_EMPTY);
-#else
-#  define png_safe_warning 0/*dummy argument*/
-#endif
-
-PNG_INTERNAL_FUNCTION(int,png_safe_execute,(png_imagep image,
-   int (*function)(png_voidp), png_voidp arg),PNG_EMPTY);
-
-/* Utility to log an error; this also cleans up the png_image; the function
- * always returns 0 (false).
- */
-PNG_INTERNAL_FUNCTION(int,png_image_error,(png_imagep image,
-   png_const_charp error_message),PNG_EMPTY);
-
-#ifndef PNG_SIMPLIFIED_READ_SUPPORTED
-/* png_image_free is used by the write code but not exported */
-PNG_INTERNAL_FUNCTION(void, png_image_free, (png_imagep image), PNG_EMPTY);
-#endif /* !SIMPLIFIED_READ */
-
-#endif /* SIMPLIFIED READ/WRITE */
-
-/* These are initialization functions for hardware specific PNG filter
- * optimizations; list these here then select the appropriate one at compile
- * time using the macro PNG_FILTER_OPTIMIZATIONS.  If the macro is not defined
- * the generic code is used.
- */
-#ifdef PNG_FILTER_OPTIMIZATIONS
-PNG_INTERNAL_FUNCTION(void, PNG_FILTER_OPTIMIZATIONS, (png_structp png_ptr,
-   unsigned int bpp), PNG_EMPTY);
-   /* Just declare the optimization that will be used */
-#else
-   /* List *all* the possible optimizations here - this branch is required if
-    * the builder of libpng passes the definition of PNG_FILTER_OPTIMIZATIONS in
-    * CFLAGS in place of CPPFLAGS *and* uses symbol prefixing.
-    */
-#  if PNG_ARM_NEON_OPT > 0
-PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_neon,
-   (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
-#endif
-
-#if PNG_MIPS_MSA_IMPLEMENTATION == 1
-PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_mips,
-   (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
-#endif
-
-#  if PNG_MIPS_MMI_IMPLEMENTATION > 0
-PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_mips,
-   (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
-#  endif
-
-#  if PNG_INTEL_SSE_IMPLEMENTATION > 0
-PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_sse2,
-   (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
-#  endif
-#endif
-
-#if PNG_LOONGARCH_LSX_OPT > 0
-PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_lsx,
-    (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
-#endif
-
-PNG_INTERNAL_FUNCTION(png_uint_32, png_check_keyword, (png_structrp png_ptr,
-   png_const_charp key, png_bytep new_key), PNG_EMPTY);
-
-#if PNG_ARM_NEON_IMPLEMENTATION == 1
-PNG_INTERNAL_FUNCTION(void,
-                      png_riffle_palette_neon,
-                      (png_structrp),
-                      PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(int,
-                      png_do_expand_palette_rgba8_neon,
-                      (png_structrp,
-                       png_row_infop,
-                       png_const_bytep,
-                       const png_bytepp,
-                       const png_bytepp),
-                      PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(int,
-                      png_do_expand_palette_rgb8_neon,
-                      (png_structrp,
-                       png_row_infop,
-                       png_const_bytep,
-                       const png_bytepp,
-                       const png_bytepp),
-                      PNG_EMPTY);
-#endif
-
-/* Maintainer: Put new private prototypes here ^ */
-
-#include "pngdebug.h"
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* PNG_VERSION_INFO_ONLY */
-#endif /* PNGPRIV_H */
diff --git a/dep/libpng/src/pngread.c b/dep/libpng/src/pngread.c
deleted file mode 100644
index 07a39df6e..000000000
--- a/dep/libpng/src/pngread.c
+++ /dev/null
@@ -1,4228 +0,0 @@
-
-/* pngread.c - read a PNG file
- *
- * Copyright (c) 2018-2024 Cosmin Truta
- * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
- * Copyright (c) 1996-1997 Andreas Dilger
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- *
- * This file contains routines that an application calls directly to
- * read a PNG file or stream.
- */
-
-#include "pngpriv.h"
-#if defined(PNG_SIMPLIFIED_READ_SUPPORTED) && defined(PNG_STDIO_SUPPORTED)
-#  include <errno.h>
-#endif
-
-#ifdef PNG_READ_SUPPORTED
-
-/* Create a PNG structure for reading, and allocate any memory needed. */
-PNG_FUNCTION(png_structp,PNGAPI
-png_create_read_struct,(png_const_charp user_png_ver, png_voidp error_ptr,
-    png_error_ptr error_fn, png_error_ptr warn_fn),PNG_ALLOCATED)
-{
-#ifndef PNG_USER_MEM_SUPPORTED
-   png_structp png_ptr = png_create_png_struct(user_png_ver, error_ptr,
-        error_fn, warn_fn, NULL, NULL, NULL);
-#else
-   return png_create_read_struct_2(user_png_ver, error_ptr, error_fn,
-        warn_fn, NULL, NULL, NULL);
-}
-
-/* Alternate create PNG structure for reading, and allocate any memory
- * needed.
- */
-PNG_FUNCTION(png_structp,PNGAPI
-png_create_read_struct_2,(png_const_charp user_png_ver, png_voidp error_ptr,
-    png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr,
-    png_malloc_ptr malloc_fn, png_free_ptr free_fn),PNG_ALLOCATED)
-{
-   png_structp png_ptr = png_create_png_struct(user_png_ver, error_ptr,
-       error_fn, warn_fn, mem_ptr, malloc_fn, free_fn);
-#endif /* USER_MEM */
-
-   if (png_ptr != NULL)
-   {
-      png_ptr->mode = PNG_IS_READ_STRUCT;
-
-      /* Added in libpng-1.6.0; this can be used to detect a read structure if
-       * required (it will be zero in a write structure.)
-       */
-#     ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-         png_ptr->IDAT_read_size = PNG_IDAT_READ_SIZE;
-#     endif
-
-#     ifdef PNG_BENIGN_READ_ERRORS_SUPPORTED
-         png_ptr->flags |= PNG_FLAG_BENIGN_ERRORS_WARN;
-
-         /* In stable builds only warn if an application error can be completely
-          * handled.
-          */
-#        if PNG_RELEASE_BUILD
-            png_ptr->flags |= PNG_FLAG_APP_WARNINGS_WARN;
-#        endif
-#     endif
-
-      /* TODO: delay this, it can be done in png_init_io (if the app doesn't
-       * do it itself) avoiding setting the default function if it is not
-       * required.
-       */
-      png_set_read_fn(png_ptr, NULL, NULL);
-   }
-
-   return png_ptr;
-}
-
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-/* Read the information before the actual image data.  This has been
- * changed in v0.90 to allow reading a file that already has the magic
- * bytes read from the stream.  You can tell libpng how many bytes have
- * been read from the beginning of the stream (up to the maximum of 8)
- * via png_set_sig_bytes(), and we will only check the remaining bytes
- * here.  The application can then have access to the signature bytes we
- * read if it is determined that this isn't a valid PNG file.
- */
-void PNGAPI
-png_read_info(png_structrp png_ptr, png_inforp info_ptr)
-{
-#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-   int keep;
-#endif
-
-   png_debug(1, "in png_read_info");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   /* Read and check the PNG file signature. */
-   png_read_sig(png_ptr, info_ptr);
-
-   for (;;)
-   {
-      png_uint_32 length = png_read_chunk_header(png_ptr);
-      png_uint_32 chunk_name = png_ptr->chunk_name;
-
-      /* IDAT logic needs to happen here to simplify getting the two flags
-       * right.
-       */
-      if (chunk_name == png_IDAT)
-      {
-         if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
-            png_chunk_error(png_ptr, "Missing IHDR before IDAT");
-
-         else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
-             (png_ptr->mode & PNG_HAVE_PLTE) == 0)
-            png_chunk_error(png_ptr, "Missing PLTE before IDAT");
-
-         else if ((png_ptr->mode & PNG_AFTER_IDAT) != 0)
-            png_chunk_benign_error(png_ptr, "Too many IDATs found");
-
-         png_ptr->mode |= PNG_HAVE_IDAT;
-      }
-
-      else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
-      {
-         png_ptr->mode |= PNG_HAVE_CHUNK_AFTER_IDAT;
-         png_ptr->mode |= PNG_AFTER_IDAT;
-      }
-
-      /* This should be a binary subdivision search or a hash for
-       * matching the chunk name rather than a linear search.
-       */
-      if (chunk_name == png_IHDR)
-         png_handle_IHDR(png_ptr, info_ptr, length);
-
-      else if (chunk_name == png_IEND)
-         png_handle_IEND(png_ptr, info_ptr, length);
-
-#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-      else if ((keep = png_chunk_unknown_handling(png_ptr, chunk_name)) != 0)
-      {
-         png_handle_unknown(png_ptr, info_ptr, length, keep);
-
-         if (chunk_name == png_PLTE)
-            png_ptr->mode |= PNG_HAVE_PLTE;
-
-         else if (chunk_name == png_IDAT)
-         {
-            png_ptr->idat_size = 0; /* It has been consumed */
-            break;
-         }
-      }
-#endif
-      else if (chunk_name == png_PLTE)
-         png_handle_PLTE(png_ptr, info_ptr, length);
-
-      else if (chunk_name == png_IDAT)
-      {
-         png_ptr->idat_size = length;
-         break;
-      }
-
-#ifdef PNG_READ_bKGD_SUPPORTED
-      else if (chunk_name == png_bKGD)
-         png_handle_bKGD(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_cHRM_SUPPORTED
-      else if (chunk_name == png_cHRM)
-         png_handle_cHRM(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_eXIf_SUPPORTED
-      else if (chunk_name == png_eXIf)
-         png_handle_eXIf(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_gAMA_SUPPORTED
-      else if (chunk_name == png_gAMA)
-         png_handle_gAMA(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_hIST_SUPPORTED
-      else if (chunk_name == png_hIST)
-         png_handle_hIST(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_oFFs_SUPPORTED
-      else if (chunk_name == png_oFFs)
-         png_handle_oFFs(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_pCAL_SUPPORTED
-      else if (chunk_name == png_pCAL)
-         png_handle_pCAL(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_sCAL_SUPPORTED
-      else if (chunk_name == png_sCAL)
-         png_handle_sCAL(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_pHYs_SUPPORTED
-      else if (chunk_name == png_pHYs)
-         png_handle_pHYs(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_sBIT_SUPPORTED
-      else if (chunk_name == png_sBIT)
-         png_handle_sBIT(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_sRGB_SUPPORTED
-      else if (chunk_name == png_sRGB)
-         png_handle_sRGB(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_iCCP_SUPPORTED
-      else if (chunk_name == png_iCCP)
-         png_handle_iCCP(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_sPLT_SUPPORTED
-      else if (chunk_name == png_sPLT)
-         png_handle_sPLT(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_tEXt_SUPPORTED
-      else if (chunk_name == png_tEXt)
-         png_handle_tEXt(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_tIME_SUPPORTED
-      else if (chunk_name == png_tIME)
-         png_handle_tIME(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_tRNS_SUPPORTED
-      else if (chunk_name == png_tRNS)
-         png_handle_tRNS(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_zTXt_SUPPORTED
-      else if (chunk_name == png_zTXt)
-         png_handle_zTXt(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_iTXt_SUPPORTED
-      else if (chunk_name == png_iTXt)
-         png_handle_iTXt(png_ptr, info_ptr, length);
-#endif
-
-      else
-         png_handle_unknown(png_ptr, info_ptr, length,
-             PNG_HANDLE_CHUNK_AS_DEFAULT);
-   }
-}
-#endif /* SEQUENTIAL_READ */
-
-/* Optional call to update the users info_ptr structure */
-void PNGAPI
-png_read_update_info(png_structrp png_ptr, png_inforp info_ptr)
-{
-   png_debug(1, "in png_read_update_info");
-
-   if (png_ptr != NULL)
-   {
-      if ((png_ptr->flags & PNG_FLAG_ROW_INIT) == 0)
-      {
-         png_read_start_row(png_ptr);
-
-#        ifdef PNG_READ_TRANSFORMS_SUPPORTED
-            png_read_transform_info(png_ptr, info_ptr);
-#        else
-            PNG_UNUSED(info_ptr)
-#        endif
-      }
-
-      /* New in 1.6.0 this avoids the bug of doing the initializations twice */
-      else
-         png_app_error(png_ptr,
-             "png_read_update_info/png_start_read_image: duplicate call");
-   }
-}
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-/* Initialize palette, background, etc, after transformations
- * are set, but before any reading takes place.  This allows
- * the user to obtain a gamma-corrected palette, for example.
- * If the user doesn't call this, we will do it ourselves.
- */
-void PNGAPI
-png_start_read_image(png_structrp png_ptr)
-{
-   png_debug(1, "in png_start_read_image");
-
-   if (png_ptr != NULL)
-   {
-      if ((png_ptr->flags & PNG_FLAG_ROW_INIT) == 0)
-         png_read_start_row(png_ptr);
-
-      /* New in 1.6.0 this avoids the bug of doing the initializations twice */
-      else
-         png_app_error(png_ptr,
-             "png_start_read_image/png_read_update_info: duplicate call");
-   }
-}
-#endif /* SEQUENTIAL_READ */
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-/* Undoes intrapixel differencing,
- * NOTE: this is apparently only supported in the 'sequential' reader.
- */
-static void
-png_do_read_intrapixel(png_row_infop row_info, png_bytep row)
-{
-   png_debug(1, "in png_do_read_intrapixel");
-
-   if (
-       (row_info->color_type & PNG_COLOR_MASK_COLOR) != 0)
-   {
-      int bytes_per_pixel;
-      png_uint_32 row_width = row_info->width;
-
-      if (row_info->bit_depth == 8)
-      {
-         png_bytep rp;
-         png_uint_32 i;
-
-         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
-            bytes_per_pixel = 3;
-
-         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
-            bytes_per_pixel = 4;
-
-         else
-            return;
-
-         for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel)
-         {
-            *(rp) = (png_byte)((256 + *rp + *(rp + 1)) & 0xff);
-            *(rp+2) = (png_byte)((256 + *(rp + 2) + *(rp + 1)) & 0xff);
-         }
-      }
-      else if (row_info->bit_depth == 16)
-      {
-         png_bytep rp;
-         png_uint_32 i;
-
-         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
-            bytes_per_pixel = 6;
-
-         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
-            bytes_per_pixel = 8;
-
-         else
-            return;
-
-         for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel)
-         {
-            png_uint_32 s0   = (png_uint_32)(*(rp    ) << 8) | *(rp + 1);
-            png_uint_32 s1   = (png_uint_32)(*(rp + 2) << 8) | *(rp + 3);
-            png_uint_32 s2   = (png_uint_32)(*(rp + 4) << 8) | *(rp + 5);
-            png_uint_32 red  = (s0 + s1 + 65536) & 0xffff;
-            png_uint_32 blue = (s2 + s1 + 65536) & 0xffff;
-            *(rp    ) = (png_byte)((red >> 8) & 0xff);
-            *(rp + 1) = (png_byte)(red & 0xff);
-            *(rp + 4) = (png_byte)((blue >> 8) & 0xff);
-            *(rp + 5) = (png_byte)(blue & 0xff);
-         }
-      }
-   }
-}
-#endif /* MNG_FEATURES */
-
-void PNGAPI
-png_read_row(png_structrp png_ptr, png_bytep row, png_bytep dsp_row)
-{
-   png_row_info row_info;
-
-   if (png_ptr == NULL)
-      return;
-
-   png_debug2(1, "in png_read_row (row %lu, pass %d)",
-       (unsigned long)png_ptr->row_number, png_ptr->pass);
-
-   /* png_read_start_row sets the information (in particular iwidth) for this
-    * interlace pass.
-    */
-   if ((png_ptr->flags & PNG_FLAG_ROW_INIT) == 0)
-      png_read_start_row(png_ptr);
-
-   /* 1.5.6: row_info moved out of png_struct to a local here. */
-   row_info.width = png_ptr->iwidth; /* NOTE: width of current interlaced row */
-   row_info.color_type = png_ptr->color_type;
-   row_info.bit_depth = png_ptr->bit_depth;
-   row_info.channels = png_ptr->channels;
-   row_info.pixel_depth = png_ptr->pixel_depth;
-   row_info.rowbytes = PNG_ROWBYTES(row_info.pixel_depth, row_info.width);
-
-#ifdef PNG_WARNINGS_SUPPORTED
-   if (png_ptr->row_number == 0 && png_ptr->pass == 0)
-   {
-   /* Check for transforms that have been set but were defined out */
-#if defined(PNG_WRITE_INVERT_SUPPORTED) && !defined(PNG_READ_INVERT_SUPPORTED)
-   if ((png_ptr->transformations & PNG_INVERT_MONO) != 0)
-      png_warning(png_ptr, "PNG_READ_INVERT_SUPPORTED is not defined");
-#endif
-
-#if defined(PNG_WRITE_FILLER_SUPPORTED) && !defined(PNG_READ_FILLER_SUPPORTED)
-   if ((png_ptr->transformations & PNG_FILLER) != 0)
-      png_warning(png_ptr, "PNG_READ_FILLER_SUPPORTED is not defined");
-#endif
-
-#if defined(PNG_WRITE_PACKSWAP_SUPPORTED) && \
-    !defined(PNG_READ_PACKSWAP_SUPPORTED)
-   if ((png_ptr->transformations & PNG_PACKSWAP) != 0)
-      png_warning(png_ptr, "PNG_READ_PACKSWAP_SUPPORTED is not defined");
-#endif
-
-#if defined(PNG_WRITE_PACK_SUPPORTED) && !defined(PNG_READ_PACK_SUPPORTED)
-   if ((png_ptr->transformations & PNG_PACK) != 0)
-      png_warning(png_ptr, "PNG_READ_PACK_SUPPORTED is not defined");
-#endif
-
-#if defined(PNG_WRITE_SHIFT_SUPPORTED) && !defined(PNG_READ_SHIFT_SUPPORTED)
-   if ((png_ptr->transformations & PNG_SHIFT) != 0)
-      png_warning(png_ptr, "PNG_READ_SHIFT_SUPPORTED is not defined");
-#endif
-
-#if defined(PNG_WRITE_BGR_SUPPORTED) && !defined(PNG_READ_BGR_SUPPORTED)
-   if ((png_ptr->transformations & PNG_BGR) != 0)
-      png_warning(png_ptr, "PNG_READ_BGR_SUPPORTED is not defined");
-#endif
-
-#if defined(PNG_WRITE_SWAP_SUPPORTED) && !defined(PNG_READ_SWAP_SUPPORTED)
-   if ((png_ptr->transformations & PNG_SWAP_BYTES) != 0)
-      png_warning(png_ptr, "PNG_READ_SWAP_SUPPORTED is not defined");
-#endif
-   }
-#endif /* WARNINGS */
-
-#ifdef PNG_READ_INTERLACING_SUPPORTED
-   /* If interlaced and we do not need a new row, combine row and return.
-    * Notice that the pixels we have from previous rows have been transformed
-    * already; we can only combine like with like (transformed or
-    * untransformed) and, because of the libpng API for interlaced images, this
-    * means we must transform before de-interlacing.
-    */
-   if (png_ptr->interlaced != 0 &&
-       (png_ptr->transformations & PNG_INTERLACE) != 0)
-   {
-      switch (png_ptr->pass)
-      {
-         case 0:
-            if (png_ptr->row_number & 0x07)
-            {
-               if (dsp_row != NULL)
-                  png_combine_row(png_ptr, dsp_row, 1/*display*/);
-               png_read_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         case 1:
-            if ((png_ptr->row_number & 0x07) || png_ptr->width < 5)
-            {
-               if (dsp_row != NULL)
-                  png_combine_row(png_ptr, dsp_row, 1/*display*/);
-
-               png_read_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         case 2:
-            if ((png_ptr->row_number & 0x07) != 4)
-            {
-               if (dsp_row != NULL && (png_ptr->row_number & 4))
-                  png_combine_row(png_ptr, dsp_row, 1/*display*/);
-
-               png_read_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         case 3:
-            if ((png_ptr->row_number & 3) || png_ptr->width < 3)
-            {
-               if (dsp_row != NULL)
-                  png_combine_row(png_ptr, dsp_row, 1/*display*/);
-
-               png_read_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         case 4:
-            if ((png_ptr->row_number & 3) != 2)
-            {
-               if (dsp_row != NULL && (png_ptr->row_number & 2))
-                  png_combine_row(png_ptr, dsp_row, 1/*display*/);
-
-               png_read_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         case 5:
-            if ((png_ptr->row_number & 1) || png_ptr->width < 2)
-            {
-               if (dsp_row != NULL)
-                  png_combine_row(png_ptr, dsp_row, 1/*display*/);
-
-               png_read_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         default:
-         case 6:
-            if ((png_ptr->row_number & 1) == 0)
-            {
-               png_read_finish_row(png_ptr);
-               return;
-            }
-            break;
-      }
-   }
-#endif
-
-   if ((png_ptr->mode & PNG_HAVE_IDAT) == 0)
-      png_error(png_ptr, "Invalid attempt to read row data");
-
-   /* Fill the row with IDAT data: */
-   png_ptr->row_buf[0]=255; /* to force error if no data was found */
-   png_read_IDAT_data(png_ptr, png_ptr->row_buf, row_info.rowbytes + 1);
-
-   if (png_ptr->row_buf[0] > PNG_FILTER_VALUE_NONE)
-   {
-      if (png_ptr->row_buf[0] < PNG_FILTER_VALUE_LAST)
-         png_read_filter_row(png_ptr, &row_info, png_ptr->row_buf + 1,
-             png_ptr->prev_row + 1, png_ptr->row_buf[0]);
-      else
-         png_error(png_ptr, "bad adaptive filter value");
-   }
-
-   /* libpng 1.5.6: the following line was copying png_ptr->rowbytes before
-    * 1.5.6, while the buffer really is this big in current versions of libpng
-    * it may not be in the future, so this was changed just to copy the
-    * interlaced count:
-    */
-   memcpy(png_ptr->prev_row, png_ptr->row_buf, row_info.rowbytes + 1);
-
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-   if ((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) != 0 &&
-       (png_ptr->filter_type == PNG_INTRAPIXEL_DIFFERENCING))
-   {
-      /* Intrapixel differencing */
-      png_do_read_intrapixel(&row_info, png_ptr->row_buf + 1);
-   }
-#endif
-
-#ifdef PNG_READ_TRANSFORMS_SUPPORTED
-   if (png_ptr->transformations
-#     ifdef PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED
-         || png_ptr->num_palette_max >= 0
-#     endif
-      )
-      png_do_read_transformations(png_ptr, &row_info);
-#endif
-
-   /* The transformed pixel depth should match the depth now in row_info. */
-   if (png_ptr->transformed_pixel_depth == 0)
-   {
-      png_ptr->transformed_pixel_depth = row_info.pixel_depth;
-      if (row_info.pixel_depth > png_ptr->maximum_pixel_depth)
-         png_error(png_ptr, "sequential row overflow");
-   }
-
-   else if (png_ptr->transformed_pixel_depth != row_info.pixel_depth)
-      png_error(png_ptr, "internal sequential row size calculation error");
-
-#ifdef PNG_READ_INTERLACING_SUPPORTED
-   /* Expand interlaced rows to full size */
-   if (png_ptr->interlaced != 0 &&
-      (png_ptr->transformations & PNG_INTERLACE) != 0)
-   {
-      if (png_ptr->pass < 6)
-         png_do_read_interlace(&row_info, png_ptr->row_buf + 1, png_ptr->pass,
-             png_ptr->transformations);
-
-      if (dsp_row != NULL)
-         png_combine_row(png_ptr, dsp_row, 1/*display*/);
-
-      if (row != NULL)
-         png_combine_row(png_ptr, row, 0/*row*/);
-   }
-
-   else
-#endif
-   {
-      if (row != NULL)
-         png_combine_row(png_ptr, row, -1/*ignored*/);
-
-      if (dsp_row != NULL)
-         png_combine_row(png_ptr, dsp_row, -1/*ignored*/);
-   }
-   png_read_finish_row(png_ptr);
-
-   if (png_ptr->read_row_fn != NULL)
-      (*(png_ptr->read_row_fn))(png_ptr, png_ptr->row_number, png_ptr->pass);
-
-}
-#endif /* SEQUENTIAL_READ */
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-/* Read one or more rows of image data.  If the image is interlaced,
- * and png_set_interlace_handling() has been called, the rows need to
- * contain the contents of the rows from the previous pass.  If the
- * image has alpha or transparency, and png_handle_alpha()[*] has been
- * called, the rows contents must be initialized to the contents of the
- * screen.
- *
- * "row" holds the actual image, and pixels are placed in it
- * as they arrive.  If the image is displayed after each pass, it will
- * appear to "sparkle" in.  "display_row" can be used to display a
- * "chunky" progressive image, with finer detail added as it becomes
- * available.  If you do not want this "chunky" display, you may pass
- * NULL for display_row.  If you do not want the sparkle display, and
- * you have not called png_handle_alpha(), you may pass NULL for rows.
- * If you have called png_handle_alpha(), and the image has either an
- * alpha channel or a transparency chunk, you must provide a buffer for
- * rows.  In this case, you do not have to provide a display_row buffer
- * also, but you may.  If the image is not interlaced, or if you have
- * not called png_set_interlace_handling(), the display_row buffer will
- * be ignored, so pass NULL to it.
- *
- * [*] png_handle_alpha() does not exist yet, as of this version of libpng
- */
-
-void PNGAPI
-png_read_rows(png_structrp png_ptr, png_bytepp row,
-    png_bytepp display_row, png_uint_32 num_rows)
-{
-   png_uint_32 i;
-   png_bytepp rp;
-   png_bytepp dp;
-
-   png_debug(1, "in png_read_rows");
-
-   if (png_ptr == NULL)
-      return;
-
-   rp = row;
-   dp = display_row;
-   if (rp != NULL && dp != NULL)
-      for (i = 0; i < num_rows; i++)
-      {
-         png_bytep rptr = *rp++;
-         png_bytep dptr = *dp++;
-
-         png_read_row(png_ptr, rptr, dptr);
-      }
-
-   else if (rp != NULL)
-      for (i = 0; i < num_rows; i++)
-      {
-         png_bytep rptr = *rp;
-         png_read_row(png_ptr, rptr, NULL);
-         rp++;
-      }
-
-   else if (dp != NULL)
-      for (i = 0; i < num_rows; i++)
-      {
-         png_bytep dptr = *dp;
-         png_read_row(png_ptr, NULL, dptr);
-         dp++;
-      }
-}
-#endif /* SEQUENTIAL_READ */
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-/* Read the entire image.  If the image has an alpha channel or a tRNS
- * chunk, and you have called png_handle_alpha()[*], you will need to
- * initialize the image to the current image that PNG will be overlaying.
- * We set the num_rows again here, in case it was incorrectly set in
- * png_read_start_row() by a call to png_read_update_info() or
- * png_start_read_image() if png_set_interlace_handling() wasn't called
- * prior to either of these functions like it should have been.  You can
- * only call this function once.  If you desire to have an image for
- * each pass of a interlaced image, use png_read_rows() instead.
- *
- * [*] png_handle_alpha() does not exist yet, as of this version of libpng
- */
-void PNGAPI
-png_read_image(png_structrp png_ptr, png_bytepp image)
-{
-   png_uint_32 i, image_height;
-   int pass, j;
-   png_bytepp rp;
-
-   png_debug(1, "in png_read_image");
-
-   if (png_ptr == NULL)
-      return;
-
-#ifdef PNG_READ_INTERLACING_SUPPORTED
-   if ((png_ptr->flags & PNG_FLAG_ROW_INIT) == 0)
-   {
-      pass = png_set_interlace_handling(png_ptr);
-      /* And make sure transforms are initialized. */
-      png_start_read_image(png_ptr);
-   }
-   else
-   {
-      if (png_ptr->interlaced != 0 &&
-          (png_ptr->transformations & PNG_INTERLACE) == 0)
-      {
-         /* Caller called png_start_read_image or png_read_update_info without
-          * first turning on the PNG_INTERLACE transform.  We can fix this here,
-          * but the caller should do it!
-          */
-         png_warning(png_ptr, "Interlace handling should be turned on when "
-             "using png_read_image");
-         /* Make sure this is set correctly */
-         png_ptr->num_rows = png_ptr->height;
-      }
-
-      /* Obtain the pass number, which also turns on the PNG_INTERLACE flag in
-       * the above error case.
-       */
-      pass = png_set_interlace_handling(png_ptr);
-   }
-#else
-   if (png_ptr->interlaced)
-      png_error(png_ptr,
-          "Cannot read interlaced image -- interlace handler disabled");
-
-   pass = 1;
-#endif
-
-   image_height=png_ptr->height;
-
-   for (j = 0; j < pass; j++)
-   {
-      rp = image;
-      for (i = 0; i < image_height; i++)
-      {
-         png_read_row(png_ptr, *rp, NULL);
-         rp++;
-      }
-   }
-}
-#endif /* SEQUENTIAL_READ */
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-/* Read the end of the PNG file.  Will not read past the end of the
- * file, will verify the end is accurate, and will read any comments
- * or time information at the end of the file, if info is not NULL.
- */
-void PNGAPI
-png_read_end(png_structrp png_ptr, png_inforp info_ptr)
-{
-#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-   int keep;
-#endif
-
-   png_debug(1, "in png_read_end");
-
-   if (png_ptr == NULL)
-      return;
-
-   /* If png_read_end is called in the middle of reading the rows there may
-    * still be pending IDAT data and an owned zstream.  Deal with this here.
-    */
-#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-   if (png_chunk_unknown_handling(png_ptr, png_IDAT) == 0)
-#endif
-      png_read_finish_IDAT(png_ptr);
-
-#ifdef PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED
-   /* Report invalid palette index; added at libng-1.5.10 */
-   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
-       png_ptr->num_palette_max >= png_ptr->num_palette)
-      png_benign_error(png_ptr, "Read palette index exceeding num_palette");
-#endif
-
-   do
-   {
-      png_uint_32 length = png_read_chunk_header(png_ptr);
-      png_uint_32 chunk_name = png_ptr->chunk_name;
-
-      if (chunk_name != png_IDAT)
-         png_ptr->mode |= PNG_HAVE_CHUNK_AFTER_IDAT;
-
-      if (chunk_name == png_IEND)
-         png_handle_IEND(png_ptr, info_ptr, length);
-
-      else if (chunk_name == png_IHDR)
-         png_handle_IHDR(png_ptr, info_ptr, length);
-
-      else if (info_ptr == NULL)
-         png_crc_finish(png_ptr, length);
-
-#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-      else if ((keep = png_chunk_unknown_handling(png_ptr, chunk_name)) != 0)
-      {
-         if (chunk_name == png_IDAT)
-         {
-            if ((length > 0 && !(png_ptr->flags & PNG_FLAG_ZSTREAM_ENDED))
-                || (png_ptr->mode & PNG_HAVE_CHUNK_AFTER_IDAT) != 0)
-               png_benign_error(png_ptr, ".Too many IDATs found");
-         }
-         png_handle_unknown(png_ptr, info_ptr, length, keep);
-         if (chunk_name == png_PLTE)
-            png_ptr->mode |= PNG_HAVE_PLTE;
-      }
-#endif
-
-      else if (chunk_name == png_IDAT)
-      {
-         /* Zero length IDATs are legal after the last IDAT has been
-          * read, but not after other chunks have been read.  1.6 does not
-          * always read all the deflate data; specifically it cannot be relied
-          * upon to read the Adler32 at the end.  If it doesn't ignore IDAT
-          * chunks which are longer than zero as well:
-          */
-         if ((length > 0 && !(png_ptr->flags & PNG_FLAG_ZSTREAM_ENDED))
-             || (png_ptr->mode & PNG_HAVE_CHUNK_AFTER_IDAT) != 0)
-            png_benign_error(png_ptr, "..Too many IDATs found");
-
-         png_crc_finish(png_ptr, length);
-      }
-      else if (chunk_name == png_PLTE)
-         png_handle_PLTE(png_ptr, info_ptr, length);
-
-#ifdef PNG_READ_bKGD_SUPPORTED
-      else if (chunk_name == png_bKGD)
-         png_handle_bKGD(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_cHRM_SUPPORTED
-      else if (chunk_name == png_cHRM)
-         png_handle_cHRM(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_eXIf_SUPPORTED
-      else if (chunk_name == png_eXIf)
-         png_handle_eXIf(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_gAMA_SUPPORTED
-      else if (chunk_name == png_gAMA)
-         png_handle_gAMA(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_hIST_SUPPORTED
-      else if (chunk_name == png_hIST)
-         png_handle_hIST(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_oFFs_SUPPORTED
-      else if (chunk_name == png_oFFs)
-         png_handle_oFFs(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_pCAL_SUPPORTED
-      else if (chunk_name == png_pCAL)
-         png_handle_pCAL(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_sCAL_SUPPORTED
-      else if (chunk_name == png_sCAL)
-         png_handle_sCAL(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_pHYs_SUPPORTED
-      else if (chunk_name == png_pHYs)
-         png_handle_pHYs(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_sBIT_SUPPORTED
-      else if (chunk_name == png_sBIT)
-         png_handle_sBIT(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_sRGB_SUPPORTED
-      else if (chunk_name == png_sRGB)
-         png_handle_sRGB(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_iCCP_SUPPORTED
-      else if (chunk_name == png_iCCP)
-         png_handle_iCCP(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_sPLT_SUPPORTED
-      else if (chunk_name == png_sPLT)
-         png_handle_sPLT(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_tEXt_SUPPORTED
-      else if (chunk_name == png_tEXt)
-         png_handle_tEXt(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_tIME_SUPPORTED
-      else if (chunk_name == png_tIME)
-         png_handle_tIME(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_tRNS_SUPPORTED
-      else if (chunk_name == png_tRNS)
-         png_handle_tRNS(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_zTXt_SUPPORTED
-      else if (chunk_name == png_zTXt)
-         png_handle_zTXt(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_iTXt_SUPPORTED
-      else if (chunk_name == png_iTXt)
-         png_handle_iTXt(png_ptr, info_ptr, length);
-#endif
-
-      else
-         png_handle_unknown(png_ptr, info_ptr, length,
-             PNG_HANDLE_CHUNK_AS_DEFAULT);
-   } while ((png_ptr->mode & PNG_HAVE_IEND) == 0);
-}
-#endif /* SEQUENTIAL_READ */
-
-/* Free all memory used in the read struct */
-static void
-png_read_destroy(png_structrp png_ptr)
-{
-   png_debug(1, "in png_read_destroy");
-
-#ifdef PNG_READ_GAMMA_SUPPORTED
-   png_destroy_gamma_table(png_ptr);
-#endif
-
-   png_free(png_ptr, png_ptr->big_row_buf);
-   png_ptr->big_row_buf = NULL;
-   png_free(png_ptr, png_ptr->big_prev_row);
-   png_ptr->big_prev_row = NULL;
-   png_free(png_ptr, png_ptr->read_buffer);
-   png_ptr->read_buffer = NULL;
-
-#ifdef PNG_READ_QUANTIZE_SUPPORTED
-   png_free(png_ptr, png_ptr->palette_lookup);
-   png_ptr->palette_lookup = NULL;
-   png_free(png_ptr, png_ptr->quantize_index);
-   png_ptr->quantize_index = NULL;
-#endif
-
-   if ((png_ptr->free_me & PNG_FREE_PLTE) != 0)
-   {
-      png_zfree(png_ptr, png_ptr->palette);
-      png_ptr->palette = NULL;
-   }
-   png_ptr->free_me &= ~PNG_FREE_PLTE;
-
-#if defined(PNG_tRNS_SUPPORTED) || \
-    defined(PNG_READ_EXPAND_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
-   if ((png_ptr->free_me & PNG_FREE_TRNS) != 0)
-   {
-      png_free(png_ptr, png_ptr->trans_alpha);
-      png_ptr->trans_alpha = NULL;
-   }
-   png_ptr->free_me &= ~PNG_FREE_TRNS;
-#endif
-
-   inflateEnd(&png_ptr->zstream);
-
-#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
-   png_free(png_ptr, png_ptr->save_buffer);
-   png_ptr->save_buffer = NULL;
-#endif
-
-#if defined(PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED) && \
-   defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
-   png_free(png_ptr, png_ptr->unknown_chunk.data);
-   png_ptr->unknown_chunk.data = NULL;
-#endif
-
-#ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED
-   png_free(png_ptr, png_ptr->chunk_list);
-   png_ptr->chunk_list = NULL;
-#endif
-
-#if defined(PNG_READ_EXPAND_SUPPORTED) && \
-    defined(PNG_ARM_NEON_IMPLEMENTATION)
-   png_free(png_ptr, png_ptr->riffled_palette);
-   png_ptr->riffled_palette = NULL;
-#endif
-
-   /* NOTE: the 'setjmp' buffer may still be allocated and the memory and error
-    * callbacks are still set at this point.  They are required to complete the
-    * destruction of the png_struct itself.
-    */
-}
-
-/* Free all memory used by the read */
-void PNGAPI
-png_destroy_read_struct(png_structpp png_ptr_ptr, png_infopp info_ptr_ptr,
-    png_infopp end_info_ptr_ptr)
-{
-   png_structrp png_ptr = NULL;
-
-   png_debug(1, "in png_destroy_read_struct");
-
-   if (png_ptr_ptr != NULL)
-      png_ptr = *png_ptr_ptr;
-
-   if (png_ptr == NULL)
-      return;
-
-   /* libpng 1.6.0: use the API to destroy info structs to ensure consistent
-    * behavior.  Prior to 1.6.0 libpng did extra 'info' destruction in this API.
-    * The extra was, apparently, unnecessary yet this hides memory leak bugs.
-    */
-   png_destroy_info_struct(png_ptr, end_info_ptr_ptr);
-   png_destroy_info_struct(png_ptr, info_ptr_ptr);
-
-   *png_ptr_ptr = NULL;
-   png_read_destroy(png_ptr);
-   png_destroy_png_struct(png_ptr);
-}
-
-void PNGAPI
-png_set_read_status_fn(png_structrp png_ptr, png_read_status_ptr read_row_fn)
-{
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->read_row_fn = read_row_fn;
-}
-
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-#ifdef PNG_INFO_IMAGE_SUPPORTED
-void PNGAPI
-png_read_png(png_structrp png_ptr, png_inforp info_ptr,
-    int transforms, voidp params)
-{
-   png_debug(1, "in png_read_png");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   /* png_read_info() gives us all of the information from the
-    * PNG file before the first IDAT (image data chunk).
-    */
-   png_read_info(png_ptr, info_ptr);
-   if (info_ptr->height > PNG_UINT_32_MAX/(sizeof (png_bytep)))
-      png_error(png_ptr, "Image is too high to process with png_read_png()");
-
-   /* -------------- image transformations start here ------------------- */
-   /* libpng 1.6.10: add code to cause a png_app_error if a selected TRANSFORM
-    * is not implemented.  This will only happen in de-configured (non-default)
-    * libpng builds.  The results can be unexpected - png_read_png may return
-    * short or mal-formed rows because the transform is skipped.
-    */
-
-   /* Tell libpng to strip 16-bit/color files down to 8 bits per color.
-    */
-   if ((transforms & PNG_TRANSFORM_SCALE_16) != 0)
-      /* Added at libpng-1.5.4. "strip_16" produces the same result that it
-       * did in earlier versions, while "scale_16" is now more accurate.
-       */
-#ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED
-      png_set_scale_16(png_ptr);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_SCALE_16 not supported");
-#endif
-
-   /* If both SCALE and STRIP are required pngrtran will effectively cancel the
-    * latter by doing SCALE first.  This is ok and allows apps not to check for
-    * which is supported to get the right answer.
-    */
-   if ((transforms & PNG_TRANSFORM_STRIP_16) != 0)
-#ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED
-      png_set_strip_16(png_ptr);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_STRIP_16 not supported");
-#endif
-
-   /* Strip alpha bytes from the input data without combining with
-    * the background (not recommended).
-    */
-   if ((transforms & PNG_TRANSFORM_STRIP_ALPHA) != 0)
-#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED
-      png_set_strip_alpha(png_ptr);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_STRIP_ALPHA not supported");
-#endif
-
-   /* Extract multiple pixels with bit depths of 1, 2, or 4 from a single
-    * byte into separate bytes (useful for paletted and grayscale images).
-    */
-   if ((transforms & PNG_TRANSFORM_PACKING) != 0)
-#ifdef PNG_READ_PACK_SUPPORTED
-      png_set_packing(png_ptr);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_PACKING not supported");
-#endif
-
-   /* Change the order of packed pixels to least significant bit first
-    * (not useful if you are using png_set_packing).
-    */
-   if ((transforms & PNG_TRANSFORM_PACKSWAP) != 0)
-#ifdef PNG_READ_PACKSWAP_SUPPORTED
-      png_set_packswap(png_ptr);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_PACKSWAP not supported");
-#endif
-
-   /* Expand paletted colors into true RGB triplets
-    * Expand grayscale images to full 8 bits from 1, 2, or 4 bits/pixel
-    * Expand paletted or RGB images with transparency to full alpha
-    * channels so the data will be available as RGBA quartets.
-    */
-   if ((transforms & PNG_TRANSFORM_EXPAND) != 0)
-#ifdef PNG_READ_EXPAND_SUPPORTED
-      png_set_expand(png_ptr);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_EXPAND not supported");
-#endif
-
-   /* We don't handle background color or gamma transformation or quantizing.
-    */
-
-   /* Invert monochrome files to have 0 as white and 1 as black
-    */
-   if ((transforms & PNG_TRANSFORM_INVERT_MONO) != 0)
-#ifdef PNG_READ_INVERT_SUPPORTED
-      png_set_invert_mono(png_ptr);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_INVERT_MONO not supported");
-#endif
-
-   /* If you want to shift the pixel values from the range [0,255] or
-    * [0,65535] to the original [0,7] or [0,31], or whatever range the
-    * colors were originally in:
-    */
-   if ((transforms & PNG_TRANSFORM_SHIFT) != 0)
-#ifdef PNG_READ_SHIFT_SUPPORTED
-      if ((info_ptr->valid & PNG_INFO_sBIT) != 0)
-         png_set_shift(png_ptr, &info_ptr->sig_bit);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_SHIFT not supported");
-#endif
-
-   /* Flip the RGB pixels to BGR (or RGBA to BGRA) */
-   if ((transforms & PNG_TRANSFORM_BGR) != 0)
-#ifdef PNG_READ_BGR_SUPPORTED
-      png_set_bgr(png_ptr);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_BGR not supported");
-#endif
-
-   /* Swap the RGBA or GA data to ARGB or AG (or BGRA to ABGR) */
-   if ((transforms & PNG_TRANSFORM_SWAP_ALPHA) != 0)
-#ifdef PNG_READ_SWAP_ALPHA_SUPPORTED
-      png_set_swap_alpha(png_ptr);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_SWAP_ALPHA not supported");
-#endif
-
-   /* Swap bytes of 16-bit files to least significant byte first */
-   if ((transforms & PNG_TRANSFORM_SWAP_ENDIAN) != 0)
-#ifdef PNG_READ_SWAP_SUPPORTED
-      png_set_swap(png_ptr);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_SWAP_ENDIAN not supported");
-#endif
-
-/* Added at libpng-1.2.41 */
-   /* Invert the alpha channel from opacity to transparency */
-   if ((transforms & PNG_TRANSFORM_INVERT_ALPHA) != 0)
-#ifdef PNG_READ_INVERT_ALPHA_SUPPORTED
-      png_set_invert_alpha(png_ptr);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_INVERT_ALPHA not supported");
-#endif
-
-/* Added at libpng-1.2.41 */
-   /* Expand grayscale image to RGB */
-   if ((transforms & PNG_TRANSFORM_GRAY_TO_RGB) != 0)
-#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
-      png_set_gray_to_rgb(png_ptr);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_GRAY_TO_RGB not supported");
-#endif
-
-/* Added at libpng-1.5.4 */
-   if ((transforms & PNG_TRANSFORM_EXPAND_16) != 0)
-#ifdef PNG_READ_EXPAND_16_SUPPORTED
-      png_set_expand_16(png_ptr);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_EXPAND_16 not supported");
-#endif
-
-   /* We don't handle adding filler bytes */
-
-   /* We use png_read_image and rely on that for interlace handling, but we also
-    * call png_read_update_info therefore must turn on interlace handling now:
-    */
-   (void)png_set_interlace_handling(png_ptr);
-
-   /* Optional call to gamma correct and add the background to the palette
-    * and update info structure.  REQUIRED if you are expecting libpng to
-    * update the palette for you (i.e., you selected such a transform above).
-    */
-   png_read_update_info(png_ptr, info_ptr);
-
-   /* -------------- image transformations end here ------------------- */
-
-   png_free_data(png_ptr, info_ptr, PNG_FREE_ROWS, 0);
-   if (info_ptr->row_pointers == NULL)
-   {
-      png_uint_32 iptr;
-
-      info_ptr->row_pointers = png_voidcast(png_bytepp, png_malloc(png_ptr,
-          info_ptr->height * (sizeof (png_bytep))));
-
-      for (iptr=0; iptr<info_ptr->height; iptr++)
-         info_ptr->row_pointers[iptr] = NULL;
-
-      info_ptr->free_me |= PNG_FREE_ROWS;
-
-      for (iptr = 0; iptr < info_ptr->height; iptr++)
-         info_ptr->row_pointers[iptr] = png_voidcast(png_bytep,
-             png_malloc(png_ptr, info_ptr->rowbytes));
-   }
-
-   png_read_image(png_ptr, info_ptr->row_pointers);
-   info_ptr->valid |= PNG_INFO_IDAT;
-
-   /* Read rest of file, and get additional chunks in info_ptr - REQUIRED */
-   png_read_end(png_ptr, info_ptr);
-
-   PNG_UNUSED(params)
-}
-#endif /* INFO_IMAGE */
-#endif /* SEQUENTIAL_READ */
-
-#ifdef PNG_SIMPLIFIED_READ_SUPPORTED
-/* SIMPLIFIED READ
- *
- * This code currently relies on the sequential reader, though it could easily
- * be made to work with the progressive one.
- */
-/* Arguments to png_image_finish_read: */
-
-/* Encoding of PNG data (used by the color-map code) */
-#  define P_NOTSET  0 /* File encoding not yet known */
-#  define P_sRGB    1 /* 8-bit encoded to sRGB gamma */
-#  define P_LINEAR  2 /* 16-bit linear: not encoded, NOT pre-multiplied! */
-#  define P_FILE    3 /* 8-bit encoded to file gamma, not sRGB or linear */
-#  define P_LINEAR8 4 /* 8-bit linear: only from a file value */
-
-/* Color-map processing: after libpng has run on the PNG image further
- * processing may be needed to convert the data to color-map indices.
- */
-#define PNG_CMAP_NONE      0
-#define PNG_CMAP_GA        1 /* Process GA data to a color-map with alpha */
-#define PNG_CMAP_TRANS     2 /* Process GA data to a background index */
-#define PNG_CMAP_RGB       3 /* Process RGB data */
-#define PNG_CMAP_RGB_ALPHA 4 /* Process RGBA data */
-
-/* The following document where the background is for each processing case. */
-#define PNG_CMAP_NONE_BACKGROUND      256
-#define PNG_CMAP_GA_BACKGROUND        231
-#define PNG_CMAP_TRANS_BACKGROUND     254
-#define PNG_CMAP_RGB_BACKGROUND       256
-#define PNG_CMAP_RGB_ALPHA_BACKGROUND 216
-
-typedef struct
-{
-   /* Arguments: */
-   png_imagep image;
-   png_voidp  buffer;
-   png_int_32 row_stride;
-   png_voidp  colormap;
-   png_const_colorp background;
-   /* Local variables: */
-   png_voidp       local_row;
-   png_voidp       first_row;
-   ptrdiff_t       row_bytes;           /* step between rows */
-   int             file_encoding;       /* E_ values above */
-   png_fixed_point gamma_to_linear;     /* For P_FILE, reciprocal of gamma */
-   int             colormap_processing; /* PNG_CMAP_ values above */
-} png_image_read_control;
-
-/* Do all the *safe* initialization - 'safe' means that png_error won't be
- * called, so setting up the jmp_buf is not required.  This means that anything
- * called from here must *not* call png_malloc - it has to call png_malloc_warn
- * instead so that control is returned safely back to this routine.
- */
-static int
-png_image_read_init(png_imagep image)
-{
-   if (image->opaque == NULL)
-   {
-      png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, image,
-          png_safe_error, png_safe_warning);
-
-      /* And set the rest of the structure to NULL to ensure that the various
-       * fields are consistent.
-       */
-      memset(image, 0, (sizeof *image));
-      image->version = PNG_IMAGE_VERSION;
-
-      if (png_ptr != NULL)
-      {
-         png_infop info_ptr = png_create_info_struct(png_ptr);
-
-         if (info_ptr != NULL)
-         {
-            png_controlp control = png_voidcast(png_controlp,
-                png_malloc_warn(png_ptr, (sizeof *control)));
-
-            if (control != NULL)
-            {
-               memset(control, 0, (sizeof *control));
-
-               control->png_ptr = png_ptr;
-               control->info_ptr = info_ptr;
-               control->for_write = 0;
-
-               image->opaque = control;
-               return 1;
-            }
-
-            /* Error clean up */
-            png_destroy_info_struct(png_ptr, &info_ptr);
-         }
-
-         png_destroy_read_struct(&png_ptr, NULL, NULL);
-      }
-
-      return png_image_error(image, "png_image_read: out of memory");
-   }
-
-   return png_image_error(image, "png_image_read: opaque pointer not NULL");
-}
-
-/* Utility to find the base format of a PNG file from a png_struct. */
-static png_uint_32
-png_image_format(png_structrp png_ptr)
-{
-   png_uint_32 format = 0;
-
-   if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) != 0)
-      format |= PNG_FORMAT_FLAG_COLOR;
-
-   if ((png_ptr->color_type & PNG_COLOR_MASK_ALPHA) != 0)
-      format |= PNG_FORMAT_FLAG_ALPHA;
-
-   /* Use png_ptr here, not info_ptr, because by examination png_handle_tRNS
-    * sets the png_struct fields; that's all we are interested in here.  The
-    * precise interaction with an app call to png_set_tRNS and PNG file reading
-    * is unclear.
-    */
-   else if (png_ptr->num_trans > 0)
-      format |= PNG_FORMAT_FLAG_ALPHA;
-
-   if (png_ptr->bit_depth == 16)
-      format |= PNG_FORMAT_FLAG_LINEAR;
-
-   if ((png_ptr->color_type & PNG_COLOR_MASK_PALETTE) != 0)
-      format |= PNG_FORMAT_FLAG_COLORMAP;
-
-   return format;
-}
-
-/* Is the given gamma significantly different from sRGB?  The test is the same
- * one used in pngrtran.c when deciding whether to do gamma correction.  The
- * arithmetic optimizes the division by using the fact that the inverse of the
- * file sRGB gamma is 2.2
- */
-static int
-png_gamma_not_sRGB(png_fixed_point g)
-{
-   if (g < PNG_FP_1)
-   {
-      /* An uninitialized gamma is assumed to be sRGB for the simplified API. */
-      if (g == 0)
-         return 0;
-
-      return png_gamma_significant((g * 11 + 2)/5 /* i.e. *2.2, rounded */);
-   }
-
-   return 1;
-}
-
-/* Do the main body of a 'png_image_begin_read' function; read the PNG file
- * header and fill in all the information.  This is executed in a safe context,
- * unlike the init routine above.
- */
-static int
-png_image_read_header(png_voidp argument)
-{
-   png_imagep image = png_voidcast(png_imagep, argument);
-   png_structrp png_ptr = image->opaque->png_ptr;
-   png_inforp info_ptr = image->opaque->info_ptr;
-
-#ifdef PNG_BENIGN_ERRORS_SUPPORTED
-   png_set_benign_errors(png_ptr, 1/*warn*/);
-#endif
-   png_read_info(png_ptr, info_ptr);
-
-   /* Do this the fast way; just read directly out of png_struct. */
-   image->width = png_ptr->width;
-   image->height = png_ptr->height;
-
-   {
-      png_uint_32 format = png_image_format(png_ptr);
-
-      image->format = format;
-
-#ifdef PNG_COLORSPACE_SUPPORTED
-      /* Does the colorspace match sRGB?  If there is no color endpoint
-       * (colorant) information assume yes, otherwise require the
-       * 'ENDPOINTS_MATCHP_sRGB' colorspace flag to have been set.  If the
-       * colorspace has been determined to be invalid ignore it.
-       */
-      if ((format & PNG_FORMAT_FLAG_COLOR) != 0 && ((png_ptr->colorspace.flags
-         & (PNG_COLORSPACE_HAVE_ENDPOINTS|PNG_COLORSPACE_ENDPOINTS_MATCH_sRGB|
-            PNG_COLORSPACE_INVALID)) == PNG_COLORSPACE_HAVE_ENDPOINTS))
-         image->flags |= PNG_IMAGE_FLAG_COLORSPACE_NOT_sRGB;
-#endif
-   }
-
-   /* We need the maximum number of entries regardless of the format the
-    * application sets here.
-    */
-   {
-      png_uint_32 cmap_entries;
-
-      switch (png_ptr->color_type)
-      {
-         case PNG_COLOR_TYPE_GRAY:
-            cmap_entries = 1U << png_ptr->bit_depth;
-            break;
-
-         case PNG_COLOR_TYPE_PALETTE:
-            cmap_entries = (png_uint_32)png_ptr->num_palette;
-            break;
-
-         default:
-            cmap_entries = 256;
-            break;
-      }
-
-      if (cmap_entries > 256)
-         cmap_entries = 256;
-
-      image->colormap_entries = cmap_entries;
-   }
-
-   return 1;
-}
-
-#ifdef PNG_STDIO_SUPPORTED
-int PNGAPI
-png_image_begin_read_from_stdio(png_imagep image, FILE* file)
-{
-   if (image != NULL && image->version == PNG_IMAGE_VERSION)
-   {
-      if (file != NULL)
-      {
-         if (png_image_read_init(image) != 0)
-         {
-            /* This is slightly evil, but png_init_io doesn't do anything other
-             * than this and we haven't changed the standard IO functions so
-             * this saves a 'safe' function.
-             */
-            image->opaque->png_ptr->io_ptr = file;
-            return png_safe_execute(image, png_image_read_header, image);
-         }
-      }
-
-      else
-         return png_image_error(image,
-             "png_image_begin_read_from_stdio: invalid argument");
-   }
-
-   else if (image != NULL)
-      return png_image_error(image,
-          "png_image_begin_read_from_stdio: incorrect PNG_IMAGE_VERSION");
-
-   return 0;
-}
-
-int PNGAPI
-png_image_begin_read_from_file(png_imagep image, const char *file_name)
-{
-   if (image != NULL && image->version == PNG_IMAGE_VERSION)
-   {
-      if (file_name != NULL)
-      {
-         FILE *fp = fopen(file_name, "rb");
-
-         if (fp != NULL)
-         {
-            if (png_image_read_init(image) != 0)
-            {
-               image->opaque->png_ptr->io_ptr = fp;
-               image->opaque->owned_file = 1;
-               return png_safe_execute(image, png_image_read_header, image);
-            }
-
-            /* Clean up: just the opened file. */
-            (void)fclose(fp);
-         }
-
-         else
-            return png_image_error(image, strerror(errno));
-      }
-
-      else
-         return png_image_error(image,
-             "png_image_begin_read_from_file: invalid argument");
-   }
-
-   else if (image != NULL)
-      return png_image_error(image,
-          "png_image_begin_read_from_file: incorrect PNG_IMAGE_VERSION");
-
-   return 0;
-}
-#endif /* STDIO */
-
-static void PNGCBAPI
-png_image_memory_read(png_structp png_ptr, png_bytep out, size_t need)
-{
-   if (png_ptr != NULL)
-   {
-      png_imagep image = png_voidcast(png_imagep, png_ptr->io_ptr);
-      if (image != NULL)
-      {
-         png_controlp cp = image->opaque;
-         if (cp != NULL)
-         {
-            png_const_bytep memory = cp->memory;
-            size_t size = cp->size;
-
-            if (memory != NULL && size >= need)
-            {
-               memcpy(out, memory, need);
-               cp->memory = memory + need;
-               cp->size = size - need;
-               return;
-            }
-
-            png_error(png_ptr, "read beyond end of data");
-         }
-      }
-
-      png_error(png_ptr, "invalid memory read");
-   }
-}
-
-int PNGAPI png_image_begin_read_from_memory(png_imagep image,
-    png_const_voidp memory, size_t size)
-{
-   if (image != NULL && image->version == PNG_IMAGE_VERSION)
-   {
-      if (memory != NULL && size > 0)
-      {
-         if (png_image_read_init(image) != 0)
-         {
-            /* Now set the IO functions to read from the memory buffer and
-             * store it into io_ptr.  Again do this in-place to avoid calling a
-             * libpng function that requires error handling.
-             */
-            image->opaque->memory = png_voidcast(png_const_bytep, memory);
-            image->opaque->size = size;
-            image->opaque->png_ptr->io_ptr = image;
-            image->opaque->png_ptr->read_data_fn = png_image_memory_read;
-
-            return png_safe_execute(image, png_image_read_header, image);
-         }
-      }
-
-      else
-         return png_image_error(image,
-             "png_image_begin_read_from_memory: invalid argument");
-   }
-
-   else if (image != NULL)
-      return png_image_error(image,
-          "png_image_begin_read_from_memory: incorrect PNG_IMAGE_VERSION");
-
-   return 0;
-}
-
-/* Utility function to skip chunks that are not used by the simplified image
- * read functions and an appropriate macro to call it.
- */
-#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-static void
-png_image_skip_unused_chunks(png_structrp png_ptr)
-{
-   /* Prepare the reader to ignore all recognized chunks whose data will not
-    * be used, i.e., all chunks recognized by libpng except for those
-    * involved in basic image reading:
-    *
-    *    IHDR, PLTE, IDAT, IEND
-    *
-    * Or image data handling:
-    *
-    *    tRNS, bKGD, gAMA, cHRM, sRGB, [iCCP] and sBIT.
-    *
-    * This provides a small performance improvement and eliminates any
-    * potential vulnerability to security problems in the unused chunks.
-    *
-    * At present the iCCP chunk data isn't used, so iCCP chunk can be ignored
-    * too.  This allows the simplified API to be compiled without iCCP support,
-    * however if the support is there the chunk is still checked to detect
-    * errors (which are unfortunately quite common.)
-    */
-   {
-         static const png_byte chunks_to_process[] = {
-            98,  75,  71,  68, '\0',  /* bKGD */
-            99,  72,  82,  77, '\0',  /* cHRM */
-           103,  65,  77,  65, '\0',  /* gAMA */
-#        ifdef PNG_READ_iCCP_SUPPORTED
-           105,  67,  67,  80, '\0',  /* iCCP */
-#        endif
-           115,  66,  73,  84, '\0',  /* sBIT */
-           115,  82,  71,  66, '\0',  /* sRGB */
-           };
-
-       /* Ignore unknown chunks and all other chunks except for the
-        * IHDR, PLTE, tRNS, IDAT, and IEND chunks.
-        */
-       png_set_keep_unknown_chunks(png_ptr, PNG_HANDLE_CHUNK_NEVER,
-           NULL, -1);
-
-       /* But do not ignore image data handling chunks */
-       png_set_keep_unknown_chunks(png_ptr, PNG_HANDLE_CHUNK_AS_DEFAULT,
-           chunks_to_process, (int)/*SAFE*/(sizeof chunks_to_process)/5);
-   }
-}
-
-#  define PNG_SKIP_CHUNKS(p) png_image_skip_unused_chunks(p)
-#else
-#  define PNG_SKIP_CHUNKS(p) ((void)0)
-#endif /* HANDLE_AS_UNKNOWN */
-
-/* The following macro gives the exact rounded answer for all values in the
- * range 0..255 (it actually divides by 51.2, but the rounding still generates
- * the correct numbers 0..5
- */
-#define PNG_DIV51(v8) (((v8) * 5 + 130) >> 8)
-
-/* Utility functions to make particular color-maps */
-static void
-set_file_encoding(png_image_read_control *display)
-{
-   png_fixed_point g = display->image->opaque->png_ptr->colorspace.gamma;
-   if (png_gamma_significant(g) != 0)
-   {
-      if (png_gamma_not_sRGB(g) != 0)
-      {
-         display->file_encoding = P_FILE;
-         display->gamma_to_linear = png_reciprocal(g);
-      }
-
-      else
-         display->file_encoding = P_sRGB;
-   }
-
-   else
-      display->file_encoding = P_LINEAR8;
-}
-
-static unsigned int
-decode_gamma(png_image_read_control *display, png_uint_32 value, int encoding)
-{
-   if (encoding == P_FILE) /* double check */
-      encoding = display->file_encoding;
-
-   if (encoding == P_NOTSET) /* must be the file encoding */
-   {
-      set_file_encoding(display);
-      encoding = display->file_encoding;
-   }
-
-   switch (encoding)
-   {
-      case P_FILE:
-         value = png_gamma_16bit_correct(value*257, display->gamma_to_linear);
-         break;
-
-      case P_sRGB:
-         value = png_sRGB_table[value];
-         break;
-
-      case P_LINEAR:
-         break;
-
-      case P_LINEAR8:
-         value *= 257;
-         break;
-
-#ifdef __GNUC__
-      default:
-         png_error(display->image->opaque->png_ptr,
-             "unexpected encoding (internal error)");
-#endif
-   }
-
-   return value;
-}
-
-static png_uint_32
-png_colormap_compose(png_image_read_control *display,
-    png_uint_32 foreground, int foreground_encoding, png_uint_32 alpha,
-    png_uint_32 background, int encoding)
-{
-   /* The file value is composed on the background, the background has the given
-    * encoding and so does the result, the file is encoded with P_FILE and the
-    * file and alpha are 8-bit values.  The (output) encoding will always be
-    * P_LINEAR or P_sRGB.
-    */
-   png_uint_32 f = decode_gamma(display, foreground, foreground_encoding);
-   png_uint_32 b = decode_gamma(display, background, encoding);
-
-   /* The alpha is always an 8-bit value (it comes from the palette), the value
-    * scaled by 255 is what PNG_sRGB_FROM_LINEAR requires.
-    */
-   f = f * alpha + b * (255-alpha);
-
-   if (encoding == P_LINEAR)
-   {
-      /* Scale to 65535; divide by 255, approximately (in fact this is extremely
-       * accurate, it divides by 255.00000005937181414556, with no overflow.)
-       */
-      f *= 257; /* Now scaled by 65535 */
-      f += f >> 16;
-      f = (f+32768) >> 16;
-   }
-
-   else /* P_sRGB */
-      f = PNG_sRGB_FROM_LINEAR(f);
-
-   return f;
-}
-
-/* NOTE: P_LINEAR values to this routine must be 16-bit, but P_FILE values must
- * be 8-bit.
- */
-static void
-png_create_colormap_entry(png_image_read_control *display,
-    png_uint_32 ip, png_uint_32 red, png_uint_32 green, png_uint_32 blue,
-    png_uint_32 alpha, int encoding)
-{
-   png_imagep image = display->image;
-   int output_encoding = (image->format & PNG_FORMAT_FLAG_LINEAR) != 0 ?
-       P_LINEAR : P_sRGB;
-   int convert_to_Y = (image->format & PNG_FORMAT_FLAG_COLOR) == 0 &&
-       (red != green || green != blue);
-
-   if (ip > 255)
-      png_error(image->opaque->png_ptr, "color-map index out of range");
-
-   /* Update the cache with whether the file gamma is significantly different
-    * from sRGB.
-    */
-   if (encoding == P_FILE)
-   {
-      if (display->file_encoding == P_NOTSET)
-         set_file_encoding(display);
-
-      /* Note that the cached value may be P_FILE too, but if it is then the
-       * gamma_to_linear member has been set.
-       */
-      encoding = display->file_encoding;
-   }
-
-   if (encoding == P_FILE)
-   {
-      png_fixed_point g = display->gamma_to_linear;
-
-      red = png_gamma_16bit_correct(red*257, g);
-      green = png_gamma_16bit_correct(green*257, g);
-      blue = png_gamma_16bit_correct(blue*257, g);
-
-      if (convert_to_Y != 0 || output_encoding == P_LINEAR)
-      {
-         alpha *= 257;
-         encoding = P_LINEAR;
-      }
-
-      else
-      {
-         red = PNG_sRGB_FROM_LINEAR(red * 255);
-         green = PNG_sRGB_FROM_LINEAR(green * 255);
-         blue = PNG_sRGB_FROM_LINEAR(blue * 255);
-         encoding = P_sRGB;
-      }
-   }
-
-   else if (encoding == P_LINEAR8)
-   {
-      /* This encoding occurs quite frequently in test cases because PngSuite
-       * includes a gAMA 1.0 chunk with most images.
-       */
-      red *= 257;
-      green *= 257;
-      blue *= 257;
-      alpha *= 257;
-      encoding = P_LINEAR;
-   }
-
-   else if (encoding == P_sRGB &&
-       (convert_to_Y  != 0 || output_encoding == P_LINEAR))
-   {
-      /* The values are 8-bit sRGB values, but must be converted to 16-bit
-       * linear.
-       */
-      red = png_sRGB_table[red];
-      green = png_sRGB_table[green];
-      blue = png_sRGB_table[blue];
-      alpha *= 257;
-      encoding = P_LINEAR;
-   }
-
-   /* This is set if the color isn't gray but the output is. */
-   if (encoding == P_LINEAR)
-   {
-      if (convert_to_Y != 0)
-      {
-         /* NOTE: these values are copied from png_do_rgb_to_gray */
-         png_uint_32 y = (png_uint_32)6968 * red  + (png_uint_32)23434 * green +
-            (png_uint_32)2366 * blue;
-
-         if (output_encoding == P_LINEAR)
-            y = (y + 16384) >> 15;
-
-         else
-         {
-            /* y is scaled by 32768, we need it scaled by 255: */
-            y = (y + 128) >> 8;
-            y *= 255;
-            y = PNG_sRGB_FROM_LINEAR((y + 64) >> 7);
-            alpha = PNG_DIV257(alpha);
-            encoding = P_sRGB;
-         }
-
-         blue = red = green = y;
-      }
-
-      else if (output_encoding == P_sRGB)
-      {
-         red = PNG_sRGB_FROM_LINEAR(red * 255);
-         green = PNG_sRGB_FROM_LINEAR(green * 255);
-         blue = PNG_sRGB_FROM_LINEAR(blue * 255);
-         alpha = PNG_DIV257(alpha);
-         encoding = P_sRGB;
-      }
-   }
-
-   if (encoding != output_encoding)
-      png_error(image->opaque->png_ptr, "bad encoding (internal error)");
-
-   /* Store the value. */
-   {
-#     ifdef PNG_FORMAT_AFIRST_SUPPORTED
-         int afirst = (image->format & PNG_FORMAT_FLAG_AFIRST) != 0 &&
-            (image->format & PNG_FORMAT_FLAG_ALPHA) != 0;
-#     else
-#        define afirst 0
-#     endif
-#     ifdef PNG_FORMAT_BGR_SUPPORTED
-         int bgr = (image->format & PNG_FORMAT_FLAG_BGR) != 0 ? 2 : 0;
-#     else
-#        define bgr 0
-#     endif
-
-      if (output_encoding == P_LINEAR)
-      {
-         png_uint_16p entry = png_voidcast(png_uint_16p, display->colormap);
-
-         entry += ip * PNG_IMAGE_SAMPLE_CHANNELS(image->format);
-
-         /* The linear 16-bit values must be pre-multiplied by the alpha channel
-          * value, if less than 65535 (this is, effectively, composite on black
-          * if the alpha channel is removed.)
-          */
-         switch (PNG_IMAGE_SAMPLE_CHANNELS(image->format))
-         {
-            case 4:
-               entry[afirst ? 0 : 3] = (png_uint_16)alpha;
-               /* FALLTHROUGH */
-
-            case 3:
-               if (alpha < 65535)
-               {
-                  if (alpha > 0)
-                  {
-                     blue = (blue * alpha + 32767U)/65535U;
-                     green = (green * alpha + 32767U)/65535U;
-                     red = (red * alpha + 32767U)/65535U;
-                  }
-
-                  else
-                     red = green = blue = 0;
-               }
-               entry[afirst + (2 ^ bgr)] = (png_uint_16)blue;
-               entry[afirst + 1] = (png_uint_16)green;
-               entry[afirst + bgr] = (png_uint_16)red;
-               break;
-
-            case 2:
-               entry[1 ^ afirst] = (png_uint_16)alpha;
-               /* FALLTHROUGH */
-
-            case 1:
-               if (alpha < 65535)
-               {
-                  if (alpha > 0)
-                     green = (green * alpha + 32767U)/65535U;
-
-                  else
-                     green = 0;
-               }
-               entry[afirst] = (png_uint_16)green;
-               break;
-
-            default:
-               break;
-         }
-      }
-
-      else /* output encoding is P_sRGB */
-      {
-         png_bytep entry = png_voidcast(png_bytep, display->colormap);
-
-         entry += ip * PNG_IMAGE_SAMPLE_CHANNELS(image->format);
-
-         switch (PNG_IMAGE_SAMPLE_CHANNELS(image->format))
-         {
-            case 4:
-               entry[afirst ? 0 : 3] = (png_byte)alpha;
-               /* FALLTHROUGH */
-            case 3:
-               entry[afirst + (2 ^ bgr)] = (png_byte)blue;
-               entry[afirst + 1] = (png_byte)green;
-               entry[afirst + bgr] = (png_byte)red;
-               break;
-
-            case 2:
-               entry[1 ^ afirst] = (png_byte)alpha;
-               /* FALLTHROUGH */
-            case 1:
-               entry[afirst] = (png_byte)green;
-               break;
-
-            default:
-               break;
-         }
-      }
-
-#     ifdef afirst
-#        undef afirst
-#     endif
-#     ifdef bgr
-#        undef bgr
-#     endif
-   }
-}
-
-static int
-make_gray_file_colormap(png_image_read_control *display)
-{
-   unsigned int i;
-
-   for (i=0; i<256; ++i)
-      png_create_colormap_entry(display, i, i, i, i, 255, P_FILE);
-
-   return (int)i;
-}
-
-static int
-make_gray_colormap(png_image_read_control *display)
-{
-   unsigned int i;
-
-   for (i=0; i<256; ++i)
-      png_create_colormap_entry(display, i, i, i, i, 255, P_sRGB);
-
-   return (int)i;
-}
-#define PNG_GRAY_COLORMAP_ENTRIES 256
-
-static int
-make_ga_colormap(png_image_read_control *display)
-{
-   unsigned int i, a;
-
-   /* Alpha is retained, the output will be a color-map with entries
-    * selected by six levels of alpha.  One transparent entry, 6 gray
-    * levels for all the intermediate alpha values, leaving 230 entries
-    * for the opaque grays.  The color-map entries are the six values
-    * [0..5]*51, the GA processing uses PNG_DIV51(value) to find the
-    * relevant entry.
-    *
-    * if (alpha > 229) // opaque
-    * {
-    *    // The 231 entries are selected to make the math below work:
-    *    base = 0;
-    *    entry = (231 * gray + 128) >> 8;
-    * }
-    * else if (alpha < 26) // transparent
-    * {
-    *    base = 231;
-    *    entry = 0;
-    * }
-    * else // partially opaque
-    * {
-    *    base = 226 + 6 * PNG_DIV51(alpha);
-    *    entry = PNG_DIV51(gray);
-    * }
-    */
-   i = 0;
-   while (i < 231)
-   {
-      unsigned int gray = (i * 256 + 115) / 231;
-      png_create_colormap_entry(display, i++, gray, gray, gray, 255, P_sRGB);
-   }
-
-   /* 255 is used here for the component values for consistency with the code
-    * that undoes premultiplication in pngwrite.c.
-    */
-   png_create_colormap_entry(display, i++, 255, 255, 255, 0, P_sRGB);
-
-   for (a=1; a<5; ++a)
-   {
-      unsigned int g;
-
-      for (g=0; g<6; ++g)
-         png_create_colormap_entry(display, i++, g*51, g*51, g*51, a*51,
-             P_sRGB);
-   }
-
-   return (int)i;
-}
-
-#define PNG_GA_COLORMAP_ENTRIES 256
-
-static int
-make_rgb_colormap(png_image_read_control *display)
-{
-   unsigned int i, r;
-
-   /* Build a 6x6x6 opaque RGB cube */
-   for (i=r=0; r<6; ++r)
-   {
-      unsigned int g;
-
-      for (g=0; g<6; ++g)
-      {
-         unsigned int b;
-
-         for (b=0; b<6; ++b)
-            png_create_colormap_entry(display, i++, r*51, g*51, b*51, 255,
-                P_sRGB);
-      }
-   }
-
-   return (int)i;
-}
-
-#define PNG_RGB_COLORMAP_ENTRIES 216
-
-/* Return a palette index to the above palette given three 8-bit sRGB values. */
-#define PNG_RGB_INDEX(r,g,b) \
-   ((png_byte)(6 * (6 * PNG_DIV51(r) + PNG_DIV51(g)) + PNG_DIV51(b)))
-
-static int
-png_image_read_colormap(png_voidp argument)
-{
-   png_image_read_control *display =
-      png_voidcast(png_image_read_control*, argument);
-   png_imagep image = display->image;
-
-   png_structrp png_ptr = image->opaque->png_ptr;
-   png_uint_32 output_format = image->format;
-   int output_encoding = (output_format & PNG_FORMAT_FLAG_LINEAR) != 0 ?
-      P_LINEAR : P_sRGB;
-
-   unsigned int cmap_entries;
-   unsigned int output_processing;        /* Output processing option */
-   unsigned int data_encoding = P_NOTSET; /* Encoding libpng must produce */
-
-   /* Background information; the background color and the index of this color
-    * in the color-map if it exists (else 256).
-    */
-   unsigned int background_index = 256;
-   png_uint_32 back_r, back_g, back_b;
-
-   /* Flags to accumulate things that need to be done to the input. */
-   int expand_tRNS = 0;
-
-   /* Exclude the NYI feature of compositing onto a color-mapped buffer; it is
-    * very difficult to do, the results look awful, and it is difficult to see
-    * what possible use it is because the application can't control the
-    * color-map.
-    */
-   if (((png_ptr->color_type & PNG_COLOR_MASK_ALPHA) != 0 ||
-         png_ptr->num_trans > 0) /* alpha in input */ &&
-      ((output_format & PNG_FORMAT_FLAG_ALPHA) == 0) /* no alpha in output */)
-   {
-      if (output_encoding == P_LINEAR) /* compose on black */
-         back_b = back_g = back_r = 0;
-
-      else if (display->background == NULL /* no way to remove it */)
-         png_error(png_ptr,
-             "background color must be supplied to remove alpha/transparency");
-
-      /* Get a copy of the background color (this avoids repeating the checks
-       * below.)  The encoding is 8-bit sRGB or 16-bit linear, depending on the
-       * output format.
-       */
-      else
-      {
-         back_g = display->background->green;
-         if ((output_format & PNG_FORMAT_FLAG_COLOR) != 0)
-         {
-            back_r = display->background->red;
-            back_b = display->background->blue;
-         }
-         else
-            back_b = back_r = back_g;
-      }
-   }
-
-   else if (output_encoding == P_LINEAR)
-      back_b = back_r = back_g = 65535;
-
-   else
-      back_b = back_r = back_g = 255;
-
-   /* Default the input file gamma if required - this is necessary because
-    * libpng assumes that if no gamma information is present the data is in the
-    * output format, but the simplified API deduces the gamma from the input
-    * format.
-    */
-   if ((png_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_GAMMA) == 0)
-   {
-      /* Do this directly, not using the png_colorspace functions, to ensure
-       * that it happens even if the colorspace is invalid (though probably if
-       * it is the setting will be ignored)  Note that the same thing can be
-       * achieved at the application interface with png_set_gAMA.
-       */
-      if (png_ptr->bit_depth == 16 &&
-         (image->flags & PNG_IMAGE_FLAG_16BIT_sRGB) == 0)
-         png_ptr->colorspace.gamma = PNG_GAMMA_LINEAR;
-
-      else
-         png_ptr->colorspace.gamma = PNG_GAMMA_sRGB_INVERSE;
-
-      png_ptr->colorspace.flags |= PNG_COLORSPACE_HAVE_GAMMA;
-   }
-
-   /* Decide what to do based on the PNG color type of the input data.  The
-    * utility function png_create_colormap_entry deals with most aspects of the
-    * output transformations; this code works out how to produce bytes of
-    * color-map entries from the original format.
-    */
-   switch (png_ptr->color_type)
-   {
-      case PNG_COLOR_TYPE_GRAY:
-         if (png_ptr->bit_depth <= 8)
-         {
-            /* There at most 256 colors in the output, regardless of
-             * transparency.
-             */
-            unsigned int step, i, val, trans = 256/*ignore*/, back_alpha = 0;
-
-            cmap_entries = 1U << png_ptr->bit_depth;
-            if (cmap_entries > image->colormap_entries)
-               png_error(png_ptr, "gray[8] color-map: too few entries");
-
-            step = 255 / (cmap_entries - 1);
-            output_processing = PNG_CMAP_NONE;
-
-            /* If there is a tRNS chunk then this either selects a transparent
-             * value or, if the output has no alpha, the background color.
-             */
-            if (png_ptr->num_trans > 0)
-            {
-               trans = png_ptr->trans_color.gray;
-
-               if ((output_format & PNG_FORMAT_FLAG_ALPHA) == 0)
-                  back_alpha = output_encoding == P_LINEAR ? 65535 : 255;
-            }
-
-            /* png_create_colormap_entry just takes an RGBA and writes the
-             * corresponding color-map entry using the format from 'image',
-             * including the required conversion to sRGB or linear as
-             * appropriate.  The input values are always either sRGB (if the
-             * gamma correction flag is 0) or 0..255 scaled file encoded values
-             * (if the function must gamma correct them).
-             */
-            for (i=val=0; i<cmap_entries; ++i, val += step)
-            {
-               /* 'i' is a file value.  While this will result in duplicated
-                * entries for 8-bit non-sRGB encoded files it is necessary to
-                * have non-gamma corrected values to do tRNS handling.
-                */
-               if (i != trans)
-                  png_create_colormap_entry(display, i, val, val, val, 255,
-                      P_FILE/*8-bit with file gamma*/);
-
-               /* Else this entry is transparent.  The colors don't matter if
-                * there is an alpha channel (back_alpha == 0), but it does no
-                * harm to pass them in; the values are not set above so this
-                * passes in white.
-                *
-                * NOTE: this preserves the full precision of the application
-                * supplied background color when it is used.
-                */
-               else
-                  png_create_colormap_entry(display, i, back_r, back_g, back_b,
-                      back_alpha, output_encoding);
-            }
-
-            /* We need libpng to preserve the original encoding. */
-            data_encoding = P_FILE;
-
-            /* The rows from libpng, while technically gray values, are now also
-             * color-map indices; however, they may need to be expanded to 1
-             * byte per pixel.  This is what png_set_packing does (i.e., it
-             * unpacks the bit values into bytes.)
-             */
-            if (png_ptr->bit_depth < 8)
-               png_set_packing(png_ptr);
-         }
-
-         else /* bit depth is 16 */
-         {
-            /* The 16-bit input values can be converted directly to 8-bit gamma
-             * encoded values; however, if a tRNS chunk is present 257 color-map
-             * entries are required.  This means that the extra entry requires
-             * special processing; add an alpha channel, sacrifice gray level
-             * 254 and convert transparent (alpha==0) entries to that.
-             *
-             * Use libpng to chop the data to 8 bits.  Convert it to sRGB at the
-             * same time to minimize quality loss.  If a tRNS chunk is present
-             * this means libpng must handle it too; otherwise it is impossible
-             * to do the exact match on the 16-bit value.
-             *
-             * If the output has no alpha channel *and* the background color is
-             * gray then it is possible to let libpng handle the substitution by
-             * ensuring that the corresponding gray level matches the background
-             * color exactly.
-             */
-            data_encoding = P_sRGB;
-
-            if (PNG_GRAY_COLORMAP_ENTRIES > image->colormap_entries)
-               png_error(png_ptr, "gray[16] color-map: too few entries");
-
-            cmap_entries = (unsigned int)make_gray_colormap(display);
-
-            if (png_ptr->num_trans > 0)
-            {
-               unsigned int back_alpha;
-
-               if ((output_format & PNG_FORMAT_FLAG_ALPHA) != 0)
-                  back_alpha = 0;
-
-               else
-               {
-                  if (back_r == back_g && back_g == back_b)
-                  {
-                     /* Background is gray; no special processing will be
-                      * required.
-                      */
-                     png_color_16 c;
-                     png_uint_32 gray = back_g;
-
-                     if (output_encoding == P_LINEAR)
-                     {
-                        gray = PNG_sRGB_FROM_LINEAR(gray * 255);
-
-                        /* And make sure the corresponding palette entry
-                         * matches.
-                         */
-                        png_create_colormap_entry(display, gray, back_g, back_g,
-                            back_g, 65535, P_LINEAR);
-                     }
-
-                     /* The background passed to libpng, however, must be the
-                      * sRGB value.
-                      */
-                     c.index = 0; /*unused*/
-                     c.gray = c.red = c.green = c.blue = (png_uint_16)gray;
-
-                     /* NOTE: does this work without expanding tRNS to alpha?
-                      * It should be the color->gray case below apparently
-                      * doesn't.
-                      */
-                     png_set_background_fixed(png_ptr, &c,
-                         PNG_BACKGROUND_GAMMA_SCREEN, 0/*need_expand*/,
-                         0/*gamma: not used*/);
-
-                     output_processing = PNG_CMAP_NONE;
-                     break;
-                  }
-#ifdef __COVERITY__
-                 /* Coverity claims that output_encoding cannot be 2 (P_LINEAR)
-                  * here.
-                  */
-                  back_alpha = 255;
-#else
-                  back_alpha = output_encoding == P_LINEAR ? 65535 : 255;
-#endif
-               }
-
-               /* output_processing means that the libpng-processed row will be
-                * 8-bit GA and it has to be processing to single byte color-map
-                * values.  Entry 254 is replaced by either a completely
-                * transparent entry or by the background color at full
-                * precision (and the background color is not a simple gray
-                * level in this case.)
-                */
-               expand_tRNS = 1;
-               output_processing = PNG_CMAP_TRANS;
-               background_index = 254;
-
-               /* And set (overwrite) color-map entry 254 to the actual
-                * background color at full precision.
-                */
-               png_create_colormap_entry(display, 254, back_r, back_g, back_b,
-                   back_alpha, output_encoding);
-            }
-
-            else
-               output_processing = PNG_CMAP_NONE;
-         }
-         break;
-
-      case PNG_COLOR_TYPE_GRAY_ALPHA:
-         /* 8-bit or 16-bit PNG with two channels - gray and alpha.  A minimum
-          * of 65536 combinations.  If, however, the alpha channel is to be
-          * removed there are only 256 possibilities if the background is gray.
-          * (Otherwise there is a subset of the 65536 possibilities defined by
-          * the triangle between black, white and the background color.)
-          *
-          * Reduce 16-bit files to 8-bit and sRGB encode the result.  No need to
-          * worry about tRNS matching - tRNS is ignored if there is an alpha
-          * channel.
-          */
-         data_encoding = P_sRGB;
-
-         if ((output_format & PNG_FORMAT_FLAG_ALPHA) != 0)
-         {
-            if (PNG_GA_COLORMAP_ENTRIES > image->colormap_entries)
-               png_error(png_ptr, "gray+alpha color-map: too few entries");
-
-            cmap_entries = (unsigned int)make_ga_colormap(display);
-
-            background_index = PNG_CMAP_GA_BACKGROUND;
-            output_processing = PNG_CMAP_GA;
-         }
-
-         else /* alpha is removed */
-         {
-            /* Alpha must be removed as the PNG data is processed when the
-             * background is a color because the G and A channels are
-             * independent and the vector addition (non-parallel vectors) is a
-             * 2-D problem.
-             *
-             * This can be reduced to the same algorithm as above by making a
-             * colormap containing gray levels (for the opaque grays), a
-             * background entry (for a transparent pixel) and a set of four six
-             * level color values, one set for each intermediate alpha value.
-             * See the comments in make_ga_colormap for how this works in the
-             * per-pixel processing.
-             *
-             * If the background is gray, however, we only need a 256 entry gray
-             * level color map.  It is sufficient to make the entry generated
-             * for the background color be exactly the color specified.
-             */
-            if ((output_format & PNG_FORMAT_FLAG_COLOR) == 0 ||
-               (back_r == back_g && back_g == back_b))
-            {
-               /* Background is gray; no special processing will be required. */
-               png_color_16 c;
-               png_uint_32 gray = back_g;
-
-               if (PNG_GRAY_COLORMAP_ENTRIES > image->colormap_entries)
-                  png_error(png_ptr, "gray-alpha color-map: too few entries");
-
-               cmap_entries = (unsigned int)make_gray_colormap(display);
-
-               if (output_encoding == P_LINEAR)
-               {
-                  gray = PNG_sRGB_FROM_LINEAR(gray * 255);
-
-                  /* And make sure the corresponding palette entry matches. */
-                  png_create_colormap_entry(display, gray, back_g, back_g,
-                      back_g, 65535, P_LINEAR);
-               }
-
-               /* The background passed to libpng, however, must be the sRGB
-                * value.
-                */
-               c.index = 0; /*unused*/
-               c.gray = c.red = c.green = c.blue = (png_uint_16)gray;
-
-               png_set_background_fixed(png_ptr, &c,
-                   PNG_BACKGROUND_GAMMA_SCREEN, 0/*need_expand*/,
-                   0/*gamma: not used*/);
-
-               output_processing = PNG_CMAP_NONE;
-            }
-
-            else
-            {
-               png_uint_32 i, a;
-
-               /* This is the same as png_make_ga_colormap, above, except that
-                * the entries are all opaque.
-                */
-               if (PNG_GA_COLORMAP_ENTRIES > image->colormap_entries)
-                  png_error(png_ptr, "ga-alpha color-map: too few entries");
-
-               i = 0;
-               while (i < 231)
-               {
-                  png_uint_32 gray = (i * 256 + 115) / 231;
-                  png_create_colormap_entry(display, i++, gray, gray, gray,
-                      255, P_sRGB);
-               }
-
-               /* NOTE: this preserves the full precision of the application
-                * background color.
-                */
-               background_index = i;
-               png_create_colormap_entry(display, i++, back_r, back_g, back_b,
-#ifdef __COVERITY__
-                   /* Coverity claims that output_encoding
-                    * cannot be 2 (P_LINEAR) here.
-                    */ 255U,
-#else
-                    output_encoding == P_LINEAR ? 65535U : 255U,
-#endif
-                    output_encoding);
-
-               /* For non-opaque input composite on the sRGB background - this
-                * requires inverting the encoding for each component.  The input
-                * is still converted to the sRGB encoding because this is a
-                * reasonable approximate to the logarithmic curve of human
-                * visual sensitivity, at least over the narrow range which PNG
-                * represents.  Consequently 'G' is always sRGB encoded, while
-                * 'A' is linear.  We need the linear background colors.
-                */
-               if (output_encoding == P_sRGB) /* else already linear */
-               {
-                  /* This may produce a value not exactly matching the
-                   * background, but that's ok because these numbers are only
-                   * used when alpha != 0
-                   */
-                  back_r = png_sRGB_table[back_r];
-                  back_g = png_sRGB_table[back_g];
-                  back_b = png_sRGB_table[back_b];
-               }
-
-               for (a=1; a<5; ++a)
-               {
-                  unsigned int g;
-
-                  /* PNG_sRGB_FROM_LINEAR expects a 16-bit linear value scaled
-                   * by an 8-bit alpha value (0..255).
-                   */
-                  png_uint_32 alpha = 51 * a;
-                  png_uint_32 back_rx = (255-alpha) * back_r;
-                  png_uint_32 back_gx = (255-alpha) * back_g;
-                  png_uint_32 back_bx = (255-alpha) * back_b;
-
-                  for (g=0; g<6; ++g)
-                  {
-                     png_uint_32 gray = png_sRGB_table[g*51] * alpha;
-
-                     png_create_colormap_entry(display, i++,
-                         PNG_sRGB_FROM_LINEAR(gray + back_rx),
-                         PNG_sRGB_FROM_LINEAR(gray + back_gx),
-                         PNG_sRGB_FROM_LINEAR(gray + back_bx), 255, P_sRGB);
-                  }
-               }
-
-               cmap_entries = i;
-               output_processing = PNG_CMAP_GA;
-            }
-         }
-         break;
-
-      case PNG_COLOR_TYPE_RGB:
-      case PNG_COLOR_TYPE_RGB_ALPHA:
-         /* Exclude the case where the output is gray; we can always handle this
-          * with the cases above.
-          */
-         if ((output_format & PNG_FORMAT_FLAG_COLOR) == 0)
-         {
-            /* The color-map will be grayscale, so we may as well convert the
-             * input RGB values to a simple grayscale and use the grayscale
-             * code above.
-             *
-             * NOTE: calling this apparently damages the recognition of the
-             * transparent color in background color handling; call
-             * png_set_tRNS_to_alpha before png_set_background_fixed.
-             */
-            png_set_rgb_to_gray_fixed(png_ptr, PNG_ERROR_ACTION_NONE, -1,
-                -1);
-            data_encoding = P_sRGB;
-
-            /* The output will now be one or two 8-bit gray or gray+alpha
-             * channels.  The more complex case arises when the input has alpha.
-             */
-            if ((png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA ||
-               png_ptr->num_trans > 0) &&
-               (output_format & PNG_FORMAT_FLAG_ALPHA) != 0)
-            {
-               /* Both input and output have an alpha channel, so no background
-                * processing is required; just map the GA bytes to the right
-                * color-map entry.
-                */
-               expand_tRNS = 1;
-
-               if (PNG_GA_COLORMAP_ENTRIES > image->colormap_entries)
-                  png_error(png_ptr, "rgb[ga] color-map: too few entries");
-
-               cmap_entries = (unsigned int)make_ga_colormap(display);
-               background_index = PNG_CMAP_GA_BACKGROUND;
-               output_processing = PNG_CMAP_GA;
-            }
-
-            else
-            {
-               /* Either the input or the output has no alpha channel, so there
-                * will be no non-opaque pixels in the color-map; it will just be
-                * grayscale.
-                */
-               if (PNG_GRAY_COLORMAP_ENTRIES > image->colormap_entries)
-                  png_error(png_ptr, "rgb[gray] color-map: too few entries");
-
-               /* Ideally this code would use libpng to do the gamma correction,
-                * but if an input alpha channel is to be removed we will hit the
-                * libpng bug in gamma+compose+rgb-to-gray (the double gamma
-                * correction bug).  Fix this by dropping the gamma correction in
-                * this case and doing it in the palette; this will result in
-                * duplicate palette entries, but that's better than the
-                * alternative of double gamma correction.
-                */
-               if ((png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA ||
-                  png_ptr->num_trans > 0) &&
-                  png_gamma_not_sRGB(png_ptr->colorspace.gamma) != 0)
-               {
-                  cmap_entries = (unsigned int)make_gray_file_colormap(display);
-                  data_encoding = P_FILE;
-               }
-
-               else
-                  cmap_entries = (unsigned int)make_gray_colormap(display);
-
-               /* But if the input has alpha or transparency it must be removed
-                */
-               if (png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA ||
-                  png_ptr->num_trans > 0)
-               {
-                  png_color_16 c;
-                  png_uint_32 gray = back_g;
-
-                  /* We need to ensure that the application background exists in
-                   * the colormap and that completely transparent pixels map to
-                   * it.  Achieve this simply by ensuring that the entry
-                   * selected for the background really is the background color.
-                   */
-                  if (data_encoding == P_FILE) /* from the fixup above */
-                  {
-                     /* The app supplied a gray which is in output_encoding, we
-                      * need to convert it to a value of the input (P_FILE)
-                      * encoding then set this palette entry to the required
-                      * output encoding.
-                      */
-                     if (output_encoding == P_sRGB)
-                        gray = png_sRGB_table[gray]; /* now P_LINEAR */
-
-                     gray = PNG_DIV257(png_gamma_16bit_correct(gray,
-                         png_ptr->colorspace.gamma)); /* now P_FILE */
-
-                     /* And make sure the corresponding palette entry contains
-                      * exactly the required sRGB value.
-                      */
-                     png_create_colormap_entry(display, gray, back_g, back_g,
-                         back_g, 0/*unused*/, output_encoding);
-                  }
-
-                  else if (output_encoding == P_LINEAR)
-                  {
-                     gray = PNG_sRGB_FROM_LINEAR(gray * 255);
-
-                     /* And make sure the corresponding palette entry matches.
-                      */
-                     png_create_colormap_entry(display, gray, back_g, back_g,
-                        back_g, 0/*unused*/, P_LINEAR);
-                  }
-
-                  /* The background passed to libpng, however, must be the
-                   * output (normally sRGB) value.
-                   */
-                  c.index = 0; /*unused*/
-                  c.gray = c.red = c.green = c.blue = (png_uint_16)gray;
-
-                  /* NOTE: the following is apparently a bug in libpng. Without
-                   * it the transparent color recognition in
-                   * png_set_background_fixed seems to go wrong.
-                   */
-                  expand_tRNS = 1;
-                  png_set_background_fixed(png_ptr, &c,
-                      PNG_BACKGROUND_GAMMA_SCREEN, 0/*need_expand*/,
-                      0/*gamma: not used*/);
-               }
-
-               output_processing = PNG_CMAP_NONE;
-            }
-         }
-
-         else /* output is color */
-         {
-            /* We could use png_quantize here so long as there is no transparent
-             * color or alpha; png_quantize ignores alpha.  Easier overall just
-             * to do it once and using PNG_DIV51 on the 6x6x6 reduced RGB cube.
-             * Consequently we always want libpng to produce sRGB data.
-             */
-            data_encoding = P_sRGB;
-
-            /* Is there any transparency or alpha? */
-            if (png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA ||
-               png_ptr->num_trans > 0)
-            {
-               /* Is there alpha in the output too?  If so all four channels are
-                * processed into a special RGB cube with alpha support.
-                */
-               if ((output_format & PNG_FORMAT_FLAG_ALPHA) != 0)
-               {
-                  png_uint_32 r;
-
-                  if (PNG_RGB_COLORMAP_ENTRIES+1+27 > image->colormap_entries)
-                     png_error(png_ptr, "rgb+alpha color-map: too few entries");
-
-                  cmap_entries = (unsigned int)make_rgb_colormap(display);
-
-                  /* Add a transparent entry. */
-                  png_create_colormap_entry(display, cmap_entries, 255, 255,
-                      255, 0, P_sRGB);
-
-                  /* This is stored as the background index for the processing
-                   * algorithm.
-                   */
-                  background_index = cmap_entries++;
-
-                  /* Add 27 r,g,b entries each with alpha 0.5. */
-                  for (r=0; r<256; r = (r << 1) | 0x7f)
-                  {
-                     png_uint_32 g;
-
-                     for (g=0; g<256; g = (g << 1) | 0x7f)
-                     {
-                        png_uint_32 b;
-
-                        /* This generates components with the values 0, 127 and
-                         * 255
-                         */
-                        for (b=0; b<256; b = (b << 1) | 0x7f)
-                           png_create_colormap_entry(display, cmap_entries++,
-                               r, g, b, 128, P_sRGB);
-                     }
-                  }
-
-                  expand_tRNS = 1;
-                  output_processing = PNG_CMAP_RGB_ALPHA;
-               }
-
-               else
-               {
-                  /* Alpha/transparency must be removed.  The background must
-                   * exist in the color map (achieved by setting adding it after
-                   * the 666 color-map).  If the standard processing code will
-                   * pick up this entry automatically that's all that is
-                   * required; libpng can be called to do the background
-                   * processing.
-                   */
-                  unsigned int sample_size =
-                     PNG_IMAGE_SAMPLE_SIZE(output_format);
-                  png_uint_32 r, g, b; /* sRGB background */
-
-                  if (PNG_RGB_COLORMAP_ENTRIES+1+27 > image->colormap_entries)
-                     png_error(png_ptr, "rgb-alpha color-map: too few entries");
-
-                  cmap_entries = (unsigned int)make_rgb_colormap(display);
-
-                  png_create_colormap_entry(display, cmap_entries, back_r,
-                      back_g, back_b, 0/*unused*/, output_encoding);
-
-                  if (output_encoding == P_LINEAR)
-                  {
-                     r = PNG_sRGB_FROM_LINEAR(back_r * 255);
-                     g = PNG_sRGB_FROM_LINEAR(back_g * 255);
-                     b = PNG_sRGB_FROM_LINEAR(back_b * 255);
-                  }
-
-                  else
-                  {
-                     r = back_r;
-                     g = back_g;
-                     b = back_g;
-                  }
-
-                  /* Compare the newly-created color-map entry with the one the
-                   * PNG_CMAP_RGB algorithm will use.  If the two entries don't
-                   * match, add the new one and set this as the background
-                   * index.
-                   */
-                  if (memcmp((png_const_bytep)display->colormap +
-                      sample_size * cmap_entries,
-                      (png_const_bytep)display->colormap +
-                          sample_size * PNG_RGB_INDEX(r,g,b),
-                     sample_size) != 0)
-                  {
-                     /* The background color must be added. */
-                     background_index = cmap_entries++;
-
-                     /* Add 27 r,g,b entries each with created by composing with
-                      * the background at alpha 0.5.
-                      */
-                     for (r=0; r<256; r = (r << 1) | 0x7f)
-                     {
-                        for (g=0; g<256; g = (g << 1) | 0x7f)
-                        {
-                           /* This generates components with the values 0, 127
-                            * and 255
-                            */
-                           for (b=0; b<256; b = (b << 1) | 0x7f)
-                              png_create_colormap_entry(display, cmap_entries++,
-                                  png_colormap_compose(display, r, P_sRGB, 128,
-                                      back_r, output_encoding),
-                                  png_colormap_compose(display, g, P_sRGB, 128,
-                                      back_g, output_encoding),
-                                  png_colormap_compose(display, b, P_sRGB, 128,
-                                      back_b, output_encoding),
-                                  0/*unused*/, output_encoding);
-                        }
-                     }
-
-                     expand_tRNS = 1;
-                     output_processing = PNG_CMAP_RGB_ALPHA;
-                  }
-
-                  else /* background color is in the standard color-map */
-                  {
-                     png_color_16 c;
-
-                     c.index = 0; /*unused*/
-                     c.red = (png_uint_16)back_r;
-                     c.gray = c.green = (png_uint_16)back_g;
-                     c.blue = (png_uint_16)back_b;
-
-                     png_set_background_fixed(png_ptr, &c,
-                         PNG_BACKGROUND_GAMMA_SCREEN, 0/*need_expand*/,
-                         0/*gamma: not used*/);
-
-                     output_processing = PNG_CMAP_RGB;
-                  }
-               }
-            }
-
-            else /* no alpha or transparency in the input */
-            {
-               /* Alpha in the output is irrelevant, simply map the opaque input
-                * pixels to the 6x6x6 color-map.
-                */
-               if (PNG_RGB_COLORMAP_ENTRIES > image->colormap_entries)
-                  png_error(png_ptr, "rgb color-map: too few entries");
-
-               cmap_entries = (unsigned int)make_rgb_colormap(display);
-               output_processing = PNG_CMAP_RGB;
-            }
-         }
-         break;
-
-      case PNG_COLOR_TYPE_PALETTE:
-         /* It's already got a color-map.  It may be necessary to eliminate the
-          * tRNS entries though.
-          */
-         {
-            unsigned int num_trans = png_ptr->num_trans;
-            png_const_bytep trans = num_trans > 0 ? png_ptr->trans_alpha : NULL;
-            png_const_colorp colormap = png_ptr->palette;
-            int do_background = trans != NULL &&
-               (output_format & PNG_FORMAT_FLAG_ALPHA) == 0;
-            unsigned int i;
-
-            /* Just in case: */
-            if (trans == NULL)
-               num_trans = 0;
-
-            output_processing = PNG_CMAP_NONE;
-            data_encoding = P_FILE; /* Don't change from color-map indices */
-            cmap_entries = (unsigned int)png_ptr->num_palette;
-            if (cmap_entries > 256)
-               cmap_entries = 256;
-
-            if (cmap_entries > (unsigned int)image->colormap_entries)
-               png_error(png_ptr, "palette color-map: too few entries");
-
-            for (i=0; i < cmap_entries; ++i)
-            {
-               if (do_background != 0 && i < num_trans && trans[i] < 255)
-               {
-                  if (trans[i] == 0)
-                     png_create_colormap_entry(display, i, back_r, back_g,
-                         back_b, 0, output_encoding);
-
-                  else
-                  {
-                     /* Must compose the PNG file color in the color-map entry
-                      * on the sRGB color in 'back'.
-                      */
-                     png_create_colormap_entry(display, i,
-                         png_colormap_compose(display, colormap[i].red,
-                             P_FILE, trans[i], back_r, output_encoding),
-                         png_colormap_compose(display, colormap[i].green,
-                             P_FILE, trans[i], back_g, output_encoding),
-                         png_colormap_compose(display, colormap[i].blue,
-                             P_FILE, trans[i], back_b, output_encoding),
-                         output_encoding == P_LINEAR ? trans[i] * 257U :
-                             trans[i],
-                         output_encoding);
-                  }
-               }
-
-               else
-                  png_create_colormap_entry(display, i, colormap[i].red,
-                      colormap[i].green, colormap[i].blue,
-                      i < num_trans ? trans[i] : 255U, P_FILE/*8-bit*/);
-            }
-
-            /* The PNG data may have indices packed in fewer than 8 bits, it
-             * must be expanded if so.
-             */
-            if (png_ptr->bit_depth < 8)
-               png_set_packing(png_ptr);
-         }
-         break;
-
-      default:
-         png_error(png_ptr, "invalid PNG color type");
-         /*NOT REACHED*/
-   }
-
-   /* Now deal with the output processing */
-   if (expand_tRNS != 0 && png_ptr->num_trans > 0 &&
-       (png_ptr->color_type & PNG_COLOR_MASK_ALPHA) == 0)
-      png_set_tRNS_to_alpha(png_ptr);
-
-   switch (data_encoding)
-   {
-      case P_sRGB:
-         /* Change to 8-bit sRGB */
-         png_set_alpha_mode_fixed(png_ptr, PNG_ALPHA_PNG, PNG_GAMMA_sRGB);
-         /* FALLTHROUGH */
-
-      case P_FILE:
-         if (png_ptr->bit_depth > 8)
-            png_set_scale_16(png_ptr);
-         break;
-
-#ifdef __GNUC__
-      default:
-         png_error(png_ptr, "bad data option (internal error)");
-#endif
-   }
-
-   if (cmap_entries > 256 || cmap_entries > image->colormap_entries)
-      png_error(png_ptr, "color map overflow (BAD internal error)");
-
-   image->colormap_entries = cmap_entries;
-
-   /* Double check using the recorded background index */
-   switch (output_processing)
-   {
-      case PNG_CMAP_NONE:
-         if (background_index != PNG_CMAP_NONE_BACKGROUND)
-            goto bad_background;
-         break;
-
-      case PNG_CMAP_GA:
-         if (background_index != PNG_CMAP_GA_BACKGROUND)
-            goto bad_background;
-         break;
-
-      case PNG_CMAP_TRANS:
-         if (background_index >= cmap_entries ||
-            background_index != PNG_CMAP_TRANS_BACKGROUND)
-            goto bad_background;
-         break;
-
-      case PNG_CMAP_RGB:
-         if (background_index != PNG_CMAP_RGB_BACKGROUND)
-            goto bad_background;
-         break;
-
-      case PNG_CMAP_RGB_ALPHA:
-         if (background_index != PNG_CMAP_RGB_ALPHA_BACKGROUND)
-            goto bad_background;
-         break;
-
-      default:
-         png_error(png_ptr, "bad processing option (internal error)");
-
-      bad_background:
-         png_error(png_ptr, "bad background index (internal error)");
-   }
-
-   display->colormap_processing = (int)output_processing;
-
-   return 1/*ok*/;
-}
-
-/* The final part of the color-map read called from png_image_finish_read. */
-static int
-png_image_read_and_map(png_voidp argument)
-{
-   png_image_read_control *display = png_voidcast(png_image_read_control*,
-       argument);
-   png_imagep image = display->image;
-   png_structrp png_ptr = image->opaque->png_ptr;
-   int passes;
-
-   /* Called when the libpng data must be transformed into the color-mapped
-    * form.  There is a local row buffer in display->local and this routine must
-    * do the interlace handling.
-    */
-   switch (png_ptr->interlaced)
-   {
-      case PNG_INTERLACE_NONE:
-         passes = 1;
-         break;
-
-      case PNG_INTERLACE_ADAM7:
-         passes = PNG_INTERLACE_ADAM7_PASSES;
-         break;
-
-      default:
-         png_error(png_ptr, "unknown interlace type");
-   }
-
-   {
-      png_uint_32  height = image->height;
-      png_uint_32  width = image->width;
-      int          proc = display->colormap_processing;
-      png_bytep    first_row = png_voidcast(png_bytep, display->first_row);
-      ptrdiff_t    step_row = display->row_bytes;
-      int pass;
-
-      for (pass = 0; pass < passes; ++pass)
-      {
-         unsigned int     startx, stepx, stepy;
-         png_uint_32      y;
-
-         if (png_ptr->interlaced == PNG_INTERLACE_ADAM7)
-         {
-            /* The row may be empty for a short image: */
-            if (PNG_PASS_COLS(width, pass) == 0)
-               continue;
-
-            startx = PNG_PASS_START_COL(pass);
-            stepx = PNG_PASS_COL_OFFSET(pass);
-            y = PNG_PASS_START_ROW(pass);
-            stepy = PNG_PASS_ROW_OFFSET(pass);
-         }
-
-         else
-         {
-            y = 0;
-            startx = 0;
-            stepx = stepy = 1;
-         }
-
-         for (; y<height; y += stepy)
-         {
-            png_bytep inrow = png_voidcast(png_bytep, display->local_row);
-            png_bytep outrow = first_row + y * step_row;
-            png_const_bytep end_row = outrow + width;
-
-            /* Read read the libpng data into the temporary buffer. */
-            png_read_row(png_ptr, inrow, NULL);
-
-            /* Now process the row according to the processing option, note
-             * that the caller verifies that the format of the libpng output
-             * data is as required.
-             */
-            outrow += startx;
-            switch (proc)
-            {
-               case PNG_CMAP_GA:
-                  for (; outrow < end_row; outrow += stepx)
-                  {
-                     /* The data is always in the PNG order */
-                     unsigned int gray = *inrow++;
-                     unsigned int alpha = *inrow++;
-                     unsigned int entry;
-
-                     /* NOTE: this code is copied as a comment in
-                      * make_ga_colormap above.  Please update the
-                      * comment if you change this code!
-                      */
-                     if (alpha > 229) /* opaque */
-                     {
-                        entry = (231 * gray + 128) >> 8;
-                     }
-                     else if (alpha < 26) /* transparent */
-                     {
-                        entry = 231;
-                     }
-                     else /* partially opaque */
-                     {
-                        entry = 226 + 6 * PNG_DIV51(alpha) + PNG_DIV51(gray);
-                     }
-
-                     *outrow = (png_byte)entry;
-                  }
-                  break;
-
-               case PNG_CMAP_TRANS:
-                  for (; outrow < end_row; outrow += stepx)
-                  {
-                     png_byte gray = *inrow++;
-                     png_byte alpha = *inrow++;
-
-                     if (alpha == 0)
-                        *outrow = PNG_CMAP_TRANS_BACKGROUND;
-
-                     else if (gray != PNG_CMAP_TRANS_BACKGROUND)
-                        *outrow = gray;
-
-                     else
-                        *outrow = (png_byte)(PNG_CMAP_TRANS_BACKGROUND+1);
-                  }
-                  break;
-
-               case PNG_CMAP_RGB:
-                  for (; outrow < end_row; outrow += stepx)
-                  {
-                     *outrow = PNG_RGB_INDEX(inrow[0], inrow[1], inrow[2]);
-                     inrow += 3;
-                  }
-                  break;
-
-               case PNG_CMAP_RGB_ALPHA:
-                  for (; outrow < end_row; outrow += stepx)
-                  {
-                     unsigned int alpha = inrow[3];
-
-                     /* Because the alpha entries only hold alpha==0.5 values
-                      * split the processing at alpha==0.25 (64) and 0.75
-                      * (196).
-                      */
-
-                     if (alpha >= 196)
-                        *outrow = PNG_RGB_INDEX(inrow[0], inrow[1],
-                            inrow[2]);
-
-                     else if (alpha < 64)
-                        *outrow = PNG_CMAP_RGB_ALPHA_BACKGROUND;
-
-                     else
-                     {
-                        /* Likewise there are three entries for each of r, g
-                         * and b.  We could select the entry by popcount on
-                         * the top two bits on those architectures that
-                         * support it, this is what the code below does,
-                         * crudely.
-                         */
-                        unsigned int back_i = PNG_CMAP_RGB_ALPHA_BACKGROUND+1;
-
-                        /* Here are how the values map:
-                         *
-                         * 0x00 .. 0x3f -> 0
-                         * 0x40 .. 0xbf -> 1
-                         * 0xc0 .. 0xff -> 2
-                         *
-                         * So, as above with the explicit alpha checks, the
-                         * breakpoints are at 64 and 196.
-                         */
-                        if (inrow[0] & 0x80) back_i += 9; /* red */
-                        if (inrow[0] & 0x40) back_i += 9;
-                        if (inrow[0] & 0x80) back_i += 3; /* green */
-                        if (inrow[0] & 0x40) back_i += 3;
-                        if (inrow[0] & 0x80) back_i += 1; /* blue */
-                        if (inrow[0] & 0x40) back_i += 1;
-
-                        *outrow = (png_byte)back_i;
-                     }
-
-                     inrow += 4;
-                  }
-                  break;
-
-               default:
-                  break;
-            }
-         }
-      }
-   }
-
-   return 1;
-}
-
-static int
-png_image_read_colormapped(png_voidp argument)
-{
-   png_image_read_control *display = png_voidcast(png_image_read_control*,
-       argument);
-   png_imagep image = display->image;
-   png_controlp control = image->opaque;
-   png_structrp png_ptr = control->png_ptr;
-   png_inforp info_ptr = control->info_ptr;
-
-   int passes = 0; /* As a flag */
-
-   PNG_SKIP_CHUNKS(png_ptr);
-
-   /* Update the 'info' structure and make sure the result is as required; first
-    * make sure to turn on the interlace handling if it will be required
-    * (because it can't be turned on *after* the call to png_read_update_info!)
-    */
-   if (display->colormap_processing == PNG_CMAP_NONE)
-      passes = png_set_interlace_handling(png_ptr);
-
-   png_read_update_info(png_ptr, info_ptr);
-
-   /* The expected output can be deduced from the colormap_processing option. */
-   switch (display->colormap_processing)
-   {
-      case PNG_CMAP_NONE:
-         /* Output must be one channel and one byte per pixel, the output
-          * encoding can be anything.
-          */
-         if ((info_ptr->color_type == PNG_COLOR_TYPE_PALETTE ||
-            info_ptr->color_type == PNG_COLOR_TYPE_GRAY) &&
-            info_ptr->bit_depth == 8)
-            break;
-
-         goto bad_output;
-
-      case PNG_CMAP_TRANS:
-      case PNG_CMAP_GA:
-         /* Output must be two channels and the 'G' one must be sRGB, the latter
-          * can be checked with an exact number because it should have been set
-          * to this number above!
-          */
-         if (info_ptr->color_type == PNG_COLOR_TYPE_GRAY_ALPHA &&
-            info_ptr->bit_depth == 8 &&
-            png_ptr->screen_gamma == PNG_GAMMA_sRGB &&
-            image->colormap_entries == 256)
-            break;
-
-         goto bad_output;
-
-      case PNG_CMAP_RGB:
-         /* Output must be 8-bit sRGB encoded RGB */
-         if (info_ptr->color_type == PNG_COLOR_TYPE_RGB &&
-            info_ptr->bit_depth == 8 &&
-            png_ptr->screen_gamma == PNG_GAMMA_sRGB &&
-            image->colormap_entries == 216)
-            break;
-
-         goto bad_output;
-
-      case PNG_CMAP_RGB_ALPHA:
-         /* Output must be 8-bit sRGB encoded RGBA */
-         if (info_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA &&
-            info_ptr->bit_depth == 8 &&
-            png_ptr->screen_gamma == PNG_GAMMA_sRGB &&
-            image->colormap_entries == 244 /* 216 + 1 + 27 */)
-            break;
-
-         goto bad_output;
-
-      default:
-      bad_output:
-         png_error(png_ptr, "bad color-map processing (internal error)");
-   }
-
-   /* Now read the rows.  Do this here if it is possible to read directly into
-    * the output buffer, otherwise allocate a local row buffer of the maximum
-    * size libpng requires and call the relevant processing routine safely.
-    */
-   {
-      png_voidp first_row = display->buffer;
-      ptrdiff_t row_bytes = display->row_stride;
-
-      /* The following expression is designed to work correctly whether it gives
-       * a signed or an unsigned result.
-       */
-      if (row_bytes < 0)
-      {
-         char *ptr = png_voidcast(char*, first_row);
-         ptr += (image->height-1) * (-row_bytes);
-         first_row = png_voidcast(png_voidp, ptr);
-      }
-
-      display->first_row = first_row;
-      display->row_bytes = row_bytes;
-   }
-
-   if (passes == 0)
-   {
-      int result;
-      png_voidp row = png_malloc(png_ptr, png_get_rowbytes(png_ptr, info_ptr));
-
-      display->local_row = row;
-      result = png_safe_execute(image, png_image_read_and_map, display);
-      display->local_row = NULL;
-      png_free(png_ptr, row);
-
-      return result;
-   }
-
-   else
-   {
-      png_alloc_size_t row_bytes = (png_alloc_size_t)display->row_bytes;
-
-      while (--passes >= 0)
-      {
-         png_uint_32      y = image->height;
-         png_bytep        row = png_voidcast(png_bytep, display->first_row);
-
-         for (; y > 0; --y)
-         {
-            png_read_row(png_ptr, row, NULL);
-            row += row_bytes;
-         }
-      }
-
-      return 1;
-   }
-}
-
-/* Just the row reading part of png_image_read. */
-static int
-png_image_read_composite(png_voidp argument)
-{
-   png_image_read_control *display = png_voidcast(png_image_read_control*,
-       argument);
-   png_imagep image = display->image;
-   png_structrp png_ptr = image->opaque->png_ptr;
-   int passes;
-
-   switch (png_ptr->interlaced)
-   {
-      case PNG_INTERLACE_NONE:
-         passes = 1;
-         break;
-
-      case PNG_INTERLACE_ADAM7:
-         passes = PNG_INTERLACE_ADAM7_PASSES;
-         break;
-
-      default:
-         png_error(png_ptr, "unknown interlace type");
-   }
-
-   {
-      png_uint_32  height = image->height;
-      png_uint_32  width = image->width;
-      ptrdiff_t    step_row = display->row_bytes;
-      unsigned int channels =
-          (image->format & PNG_FORMAT_FLAG_COLOR) != 0 ? 3 : 1;
-      int pass;
-
-      for (pass = 0; pass < passes; ++pass)
-      {
-         unsigned int     startx, stepx, stepy;
-         png_uint_32      y;
-
-         if (png_ptr->interlaced == PNG_INTERLACE_ADAM7)
-         {
-            /* The row may be empty for a short image: */
-            if (PNG_PASS_COLS(width, pass) == 0)
-               continue;
-
-            startx = PNG_PASS_START_COL(pass) * channels;
-            stepx = PNG_PASS_COL_OFFSET(pass) * channels;
-            y = PNG_PASS_START_ROW(pass);
-            stepy = PNG_PASS_ROW_OFFSET(pass);
-         }
-
-         else
-         {
-            y = 0;
-            startx = 0;
-            stepx = channels;
-            stepy = 1;
-         }
-
-         for (; y<height; y += stepy)
-         {
-            png_bytep inrow = png_voidcast(png_bytep, display->local_row);
-            png_bytep outrow;
-            png_const_bytep end_row;
-
-            /* Read the row, which is packed: */
-            png_read_row(png_ptr, inrow, NULL);
-
-            outrow = png_voidcast(png_bytep, display->first_row);
-            outrow += y * step_row;
-            end_row = outrow + width * channels;
-
-            /* Now do the composition on each pixel in this row. */
-            outrow += startx;
-            for (; outrow < end_row; outrow += stepx)
-            {
-               png_byte alpha = inrow[channels];
-
-               if (alpha > 0) /* else no change to the output */
-               {
-                  unsigned int c;
-
-                  for (c=0; c<channels; ++c)
-                  {
-                     png_uint_32 component = inrow[c];
-
-                     if (alpha < 255) /* else just use component */
-                     {
-                        /* This is PNG_OPTIMIZED_ALPHA, the component value
-                         * is a linear 8-bit value.  Combine this with the
-                         * current outrow[c] value which is sRGB encoded.
-                         * Arithmetic here is 16-bits to preserve the output
-                         * values correctly.
-                         */
-                        component *= 257*255; /* =65535 */
-                        component += (255-alpha)*png_sRGB_table[outrow[c]];
-
-                        /* So 'component' is scaled by 255*65535 and is
-                         * therefore appropriate for the sRGB to linear
-                         * conversion table.
-                         */
-                        component = PNG_sRGB_FROM_LINEAR(component);
-                     }
-
-                     outrow[c] = (png_byte)component;
-                  }
-               }
-
-               inrow += channels+1; /* components and alpha channel */
-            }
-         }
-      }
-   }
-
-   return 1;
-}
-
-/* The do_local_background case; called when all the following transforms are to
- * be done:
- *
- * PNG_RGB_TO_GRAY
- * PNG_COMPOSITE
- * PNG_GAMMA
- *
- * This is a work-around for the fact that both the PNG_RGB_TO_GRAY and
- * PNG_COMPOSITE code performs gamma correction, so we get double gamma
- * correction.  The fix-up is to prevent the PNG_COMPOSITE operation from
- * happening inside libpng, so this routine sees an 8 or 16-bit gray+alpha
- * row and handles the removal or pre-multiplication of the alpha channel.
- */
-static int
-png_image_read_background(png_voidp argument)
-{
-   png_image_read_control *display = png_voidcast(png_image_read_control*,
-       argument);
-   png_imagep image = display->image;
-   png_structrp png_ptr = image->opaque->png_ptr;
-   png_inforp info_ptr = image->opaque->info_ptr;
-   png_uint_32 height = image->height;
-   png_uint_32 width = image->width;
-   int pass, passes;
-
-   /* Double check the convoluted logic below.  We expect to get here with
-    * libpng doing rgb to gray and gamma correction but background processing
-    * left to the png_image_read_background function.  The rows libpng produce
-    * might be 8 or 16-bit but should always have two channels; gray plus alpha.
-    */
-   if ((png_ptr->transformations & PNG_RGB_TO_GRAY) == 0)
-      png_error(png_ptr, "lost rgb to gray");
-
-   if ((png_ptr->transformations & PNG_COMPOSE) != 0)
-      png_error(png_ptr, "unexpected compose");
-
-   if (png_get_channels(png_ptr, info_ptr) != 2)
-      png_error(png_ptr, "lost/gained channels");
-
-   /* Expect the 8-bit case to always remove the alpha channel */
-   if ((image->format & PNG_FORMAT_FLAG_LINEAR) == 0 &&
-      (image->format & PNG_FORMAT_FLAG_ALPHA) != 0)
-      png_error(png_ptr, "unexpected 8-bit transformation");
-
-   switch (png_ptr->interlaced)
-   {
-      case PNG_INTERLACE_NONE:
-         passes = 1;
-         break;
-
-      case PNG_INTERLACE_ADAM7:
-         passes = PNG_INTERLACE_ADAM7_PASSES;
-         break;
-
-      default:
-         png_error(png_ptr, "unknown interlace type");
-   }
-
-   /* Use direct access to info_ptr here because otherwise the simplified API
-    * would require PNG_EASY_ACCESS_SUPPORTED (just for this.)  Note this is
-    * checking the value after libpng expansions, not the original value in the
-    * PNG.
-    */
-   switch (info_ptr->bit_depth)
-   {
-      case 8:
-         /* 8-bit sRGB gray values with an alpha channel; the alpha channel is
-          * to be removed by composing on a background: either the row if
-          * display->background is NULL or display->background->green if not.
-          * Unlike the code above ALPHA_OPTIMIZED has *not* been done.
-          */
-         {
-            png_bytep first_row = png_voidcast(png_bytep, display->first_row);
-            ptrdiff_t step_row = display->row_bytes;
-
-            for (pass = 0; pass < passes; ++pass)
-            {
-               unsigned int     startx, stepx, stepy;
-               png_uint_32      y;
-
-               if (png_ptr->interlaced == PNG_INTERLACE_ADAM7)
-               {
-                  /* The row may be empty for a short image: */
-                  if (PNG_PASS_COLS(width, pass) == 0)
-                     continue;
-
-                  startx = PNG_PASS_START_COL(pass);
-                  stepx = PNG_PASS_COL_OFFSET(pass);
-                  y = PNG_PASS_START_ROW(pass);
-                  stepy = PNG_PASS_ROW_OFFSET(pass);
-               }
-
-               else
-               {
-                  y = 0;
-                  startx = 0;
-                  stepx = stepy = 1;
-               }
-
-               if (display->background == NULL)
-               {
-                  for (; y<height; y += stepy)
-                  {
-                     png_bytep inrow = png_voidcast(png_bytep,
-                         display->local_row);
-                     png_bytep outrow = first_row + y * step_row;
-                     png_const_bytep end_row = outrow + width;
-
-                     /* Read the row, which is packed: */
-                     png_read_row(png_ptr, inrow, NULL);
-
-                     /* Now do the composition on each pixel in this row. */
-                     outrow += startx;
-                     for (; outrow < end_row; outrow += stepx)
-                     {
-                        png_byte alpha = inrow[1];
-
-                        if (alpha > 0) /* else no change to the output */
-                        {
-                           png_uint_32 component = inrow[0];
-
-                           if (alpha < 255) /* else just use component */
-                           {
-                              /* Since PNG_OPTIMIZED_ALPHA was not set it is
-                               * necessary to invert the sRGB transfer
-                               * function and multiply the alpha out.
-                               */
-                              component = png_sRGB_table[component] * alpha;
-                              component += png_sRGB_table[outrow[0]] *
-                                 (255-alpha);
-                              component = PNG_sRGB_FROM_LINEAR(component);
-                           }
-
-                           outrow[0] = (png_byte)component;
-                        }
-
-                        inrow += 2; /* gray and alpha channel */
-                     }
-                  }
-               }
-
-               else /* constant background value */
-               {
-                  png_byte background8 = display->background->green;
-                  png_uint_16 background = png_sRGB_table[background8];
-
-                  for (; y<height; y += stepy)
-                  {
-                     png_bytep inrow = png_voidcast(png_bytep,
-                         display->local_row);
-                     png_bytep outrow = first_row + y * step_row;
-                     png_const_bytep end_row = outrow + width;
-
-                     /* Read the row, which is packed: */
-                     png_read_row(png_ptr, inrow, NULL);
-
-                     /* Now do the composition on each pixel in this row. */
-                     outrow += startx;
-                     for (; outrow < end_row; outrow += stepx)
-                     {
-                        png_byte alpha = inrow[1];
-
-                        if (alpha > 0) /* else use background */
-                        {
-                           png_uint_32 component = inrow[0];
-
-                           if (alpha < 255) /* else just use component */
-                           {
-                              component = png_sRGB_table[component] * alpha;
-                              component += background * (255-alpha);
-                              component = PNG_sRGB_FROM_LINEAR(component);
-                           }
-
-                           outrow[0] = (png_byte)component;
-                        }
-
-                        else
-                           outrow[0] = background8;
-
-                        inrow += 2; /* gray and alpha channel */
-                     }
-                  }
-               }
-            }
-         }
-         break;
-
-      case 16:
-         /* 16-bit linear with pre-multiplied alpha; the pre-multiplication must
-          * still be done and, maybe, the alpha channel removed.  This code also
-          * handles the alpha-first option.
-          */
-         {
-            png_uint_16p first_row = png_voidcast(png_uint_16p,
-                display->first_row);
-            /* The division by two is safe because the caller passed in a
-             * stride which was multiplied by 2 (below) to get row_bytes.
-             */
-            ptrdiff_t    step_row = display->row_bytes / 2;
-            unsigned int preserve_alpha = (image->format &
-                PNG_FORMAT_FLAG_ALPHA) != 0;
-            unsigned int outchannels = 1U+preserve_alpha;
-            int swap_alpha = 0;
-
-#           ifdef PNG_SIMPLIFIED_READ_AFIRST_SUPPORTED
-               if (preserve_alpha != 0 &&
-                   (image->format & PNG_FORMAT_FLAG_AFIRST) != 0)
-                  swap_alpha = 1;
-#           endif
-
-            for (pass = 0; pass < passes; ++pass)
-            {
-               unsigned int     startx, stepx, stepy;
-               png_uint_32      y;
-
-               /* The 'x' start and step are adjusted to output components here.
-                */
-               if (png_ptr->interlaced == PNG_INTERLACE_ADAM7)
-               {
-                  /* The row may be empty for a short image: */
-                  if (PNG_PASS_COLS(width, pass) == 0)
-                     continue;
-
-                  startx = PNG_PASS_START_COL(pass) * outchannels;
-                  stepx = PNG_PASS_COL_OFFSET(pass) * outchannels;
-                  y = PNG_PASS_START_ROW(pass);
-                  stepy = PNG_PASS_ROW_OFFSET(pass);
-               }
-
-               else
-               {
-                  y = 0;
-                  startx = 0;
-                  stepx = outchannels;
-                  stepy = 1;
-               }
-
-               for (; y<height; y += stepy)
-               {
-                  png_const_uint_16p inrow;
-                  png_uint_16p outrow = first_row + y*step_row;
-                  png_uint_16p end_row = outrow + width * outchannels;
-
-                  /* Read the row, which is packed: */
-                  png_read_row(png_ptr, png_voidcast(png_bytep,
-                      display->local_row), NULL);
-                  inrow = png_voidcast(png_const_uint_16p, display->local_row);
-
-                  /* Now do the pre-multiplication on each pixel in this row.
-                   */
-                  outrow += startx;
-                  for (; outrow < end_row; outrow += stepx)
-                  {
-                     png_uint_32 component = inrow[0];
-                     png_uint_16 alpha = inrow[1];
-
-                     if (alpha > 0) /* else 0 */
-                     {
-                        if (alpha < 65535) /* else just use component */
-                        {
-                           component *= alpha;
-                           component += 32767;
-                           component /= 65535;
-                        }
-                     }
-
-                     else
-                        component = 0;
-
-                     outrow[swap_alpha] = (png_uint_16)component;
-                     if (preserve_alpha != 0)
-                        outrow[1 ^ swap_alpha] = alpha;
-
-                     inrow += 2; /* components and alpha channel */
-                  }
-               }
-            }
-         }
-         break;
-
-#ifdef __GNUC__
-      default:
-         png_error(png_ptr, "unexpected bit depth");
-#endif
-   }
-
-   return 1;
-}
-
-/* The guts of png_image_finish_read as a png_safe_execute callback. */
-static int
-png_image_read_direct(png_voidp argument)
-{
-   png_image_read_control *display = png_voidcast(png_image_read_control*,
-       argument);
-   png_imagep image = display->image;
-   png_structrp png_ptr = image->opaque->png_ptr;
-   png_inforp info_ptr = image->opaque->info_ptr;
-
-   png_uint_32 format = image->format;
-   int linear = (format & PNG_FORMAT_FLAG_LINEAR) != 0;
-   int do_local_compose = 0;
-   int do_local_background = 0; /* to avoid double gamma correction bug */
-   int passes = 0;
-
-   /* Add transforms to ensure the correct output format is produced then check
-    * that the required implementation support is there.  Always expand; always
-    * need 8 bits minimum, no palette and expanded tRNS.
-    */
-   png_set_expand(png_ptr);
-
-   /* Now check the format to see if it was modified. */
-   {
-      png_uint_32 base_format = png_image_format(png_ptr) &
-         ~PNG_FORMAT_FLAG_COLORMAP /* removed by png_set_expand */;
-      png_uint_32 change = format ^ base_format;
-      png_fixed_point output_gamma;
-      int mode; /* alpha mode */
-
-      /* Do this first so that we have a record if rgb to gray is happening. */
-      if ((change & PNG_FORMAT_FLAG_COLOR) != 0)
-      {
-         /* gray<->color transformation required. */
-         if ((format & PNG_FORMAT_FLAG_COLOR) != 0)
-            png_set_gray_to_rgb(png_ptr);
-
-         else
-         {
-            /* libpng can't do both rgb to gray and
-             * background/pre-multiplication if there is also significant gamma
-             * correction, because both operations require linear colors and
-             * the code only supports one transform doing the gamma correction.
-             * Handle this by doing the pre-multiplication or background
-             * operation in this code, if necessary.
-             *
-             * TODO: fix this by rewriting pngrtran.c (!)
-             *
-             * For the moment (given that fixing this in pngrtran.c is an
-             * enormous change) 'do_local_background' is used to indicate that
-             * the problem exists.
-             */
-            if ((base_format & PNG_FORMAT_FLAG_ALPHA) != 0)
-               do_local_background = 1/*maybe*/;
-
-            png_set_rgb_to_gray_fixed(png_ptr, PNG_ERROR_ACTION_NONE,
-                PNG_RGB_TO_GRAY_DEFAULT, PNG_RGB_TO_GRAY_DEFAULT);
-         }
-
-         change &= ~PNG_FORMAT_FLAG_COLOR;
-      }
-
-      /* Set the gamma appropriately, linear for 16-bit input, sRGB otherwise.
-       */
-      {
-         png_fixed_point input_gamma_default;
-
-         if ((base_format & PNG_FORMAT_FLAG_LINEAR) != 0 &&
-             (image->flags & PNG_IMAGE_FLAG_16BIT_sRGB) == 0)
-            input_gamma_default = PNG_GAMMA_LINEAR;
-         else
-            input_gamma_default = PNG_DEFAULT_sRGB;
-
-         /* Call png_set_alpha_mode to set the default for the input gamma; the
-          * output gamma is set by a second call below.
-          */
-         png_set_alpha_mode_fixed(png_ptr, PNG_ALPHA_PNG, input_gamma_default);
-      }
-
-      if (linear != 0)
-      {
-         /* If there *is* an alpha channel in the input it must be multiplied
-          * out; use PNG_ALPHA_STANDARD, otherwise just use PNG_ALPHA_PNG.
-          */
-         if ((base_format & PNG_FORMAT_FLAG_ALPHA) != 0)
-            mode = PNG_ALPHA_STANDARD; /* associated alpha */
-
-         else
-            mode = PNG_ALPHA_PNG;
-
-         output_gamma = PNG_GAMMA_LINEAR;
-      }
-
-      else
-      {
-         mode = PNG_ALPHA_PNG;
-         output_gamma = PNG_DEFAULT_sRGB;
-      }
-
-      if ((change & PNG_FORMAT_FLAG_ASSOCIATED_ALPHA) != 0)
-      {
-         mode = PNG_ALPHA_OPTIMIZED;
-         change &= ~PNG_FORMAT_FLAG_ASSOCIATED_ALPHA;
-      }
-
-      /* If 'do_local_background' is set check for the presence of gamma
-       * correction; this is part of the work-round for the libpng bug
-       * described above.
-       *
-       * TODO: fix libpng and remove this.
-       */
-      if (do_local_background != 0)
-      {
-         png_fixed_point gtest;
-
-         /* This is 'png_gamma_threshold' from pngrtran.c; the test used for
-          * gamma correction, the screen gamma hasn't been set on png_struct
-          * yet; it's set below.  png_struct::gamma, however, is set to the
-          * final value.
-          */
-         if (png_muldiv(&gtest, output_gamma, png_ptr->colorspace.gamma,
-             PNG_FP_1) != 0 && png_gamma_significant(gtest) == 0)
-            do_local_background = 0;
-
-         else if (mode == PNG_ALPHA_STANDARD)
-         {
-            do_local_background = 2/*required*/;
-            mode = PNG_ALPHA_PNG; /* prevent libpng doing it */
-         }
-
-         /* else leave as 1 for the checks below */
-      }
-
-      /* If the bit-depth changes then handle that here. */
-      if ((change & PNG_FORMAT_FLAG_LINEAR) != 0)
-      {
-         if (linear != 0 /*16-bit output*/)
-            png_set_expand_16(png_ptr);
-
-         else /* 8-bit output */
-            png_set_scale_16(png_ptr);
-
-         change &= ~PNG_FORMAT_FLAG_LINEAR;
-      }
-
-      /* Now the background/alpha channel changes. */
-      if ((change & PNG_FORMAT_FLAG_ALPHA) != 0)
-      {
-         /* Removing an alpha channel requires composition for the 8-bit
-          * formats; for the 16-bit it is already done, above, by the
-          * pre-multiplication and the channel just needs to be stripped.
-          */
-         if ((base_format & PNG_FORMAT_FLAG_ALPHA) != 0)
-         {
-            /* If RGB->gray is happening the alpha channel must be left and the
-             * operation completed locally.
-             *
-             * TODO: fix libpng and remove this.
-             */
-            if (do_local_background != 0)
-               do_local_background = 2/*required*/;
-
-            /* 16-bit output: just remove the channel */
-            else if (linear != 0) /* compose on black (well, pre-multiply) */
-               png_set_strip_alpha(png_ptr);
-
-            /* 8-bit output: do an appropriate compose */
-            else if (display->background != NULL)
-            {
-               png_color_16 c;
-
-               c.index = 0; /*unused*/
-               c.red = display->background->red;
-               c.green = display->background->green;
-               c.blue = display->background->blue;
-               c.gray = display->background->green;
-
-               /* This is always an 8-bit sRGB value, using the 'green' channel
-                * for gray is much better than calculating the luminance here;
-                * we can get off-by-one errors in that calculation relative to
-                * the app expectations and that will show up in transparent
-                * pixels.
-                */
-               png_set_background_fixed(png_ptr, &c,
-                   PNG_BACKGROUND_GAMMA_SCREEN, 0/*need_expand*/,
-                   0/*gamma: not used*/);
-            }
-
-            else /* compose on row: implemented below. */
-            {
-               do_local_compose = 1;
-               /* This leaves the alpha channel in the output, so it has to be
-                * removed by the code below.  Set the encoding to the 'OPTIMIZE'
-                * one so the code only has to hack on the pixels that require
-                * composition.
-                */
-               mode = PNG_ALPHA_OPTIMIZED;
-            }
-         }
-
-         else /* output needs an alpha channel */
-         {
-            /* This is tricky because it happens before the swap operation has
-             * been accomplished; however, the swap does *not* swap the added
-             * alpha channel (weird API), so it must be added in the correct
-             * place.
-             */
-            png_uint_32 filler; /* opaque filler */
-            int where;
-
-            if (linear != 0)
-               filler = 65535;
-
-            else
-               filler = 255;
-
-#ifdef PNG_FORMAT_AFIRST_SUPPORTED
-            if ((format & PNG_FORMAT_FLAG_AFIRST) != 0)
-            {
-               where = PNG_FILLER_BEFORE;
-               change &= ~PNG_FORMAT_FLAG_AFIRST;
-            }
-
-            else
-#endif
-            where = PNG_FILLER_AFTER;
-
-            png_set_add_alpha(png_ptr, filler, where);
-         }
-
-         /* This stops the (irrelevant) call to swap_alpha below. */
-         change &= ~PNG_FORMAT_FLAG_ALPHA;
-      }
-
-      /* Now set the alpha mode correctly; this is always done, even if there is
-       * no alpha channel in either the input or the output because it correctly
-       * sets the output gamma.
-       */
-      png_set_alpha_mode_fixed(png_ptr, mode, output_gamma);
-
-#     ifdef PNG_FORMAT_BGR_SUPPORTED
-         if ((change & PNG_FORMAT_FLAG_BGR) != 0)
-         {
-            /* Check only the output format; PNG is never BGR; don't do this if
-             * the output is gray, but fix up the 'format' value in that case.
-             */
-            if ((format & PNG_FORMAT_FLAG_COLOR) != 0)
-               png_set_bgr(png_ptr);
-
-            else
-               format &= ~PNG_FORMAT_FLAG_BGR;
-
-            change &= ~PNG_FORMAT_FLAG_BGR;
-         }
-#     endif
-
-#     ifdef PNG_FORMAT_AFIRST_SUPPORTED
-         if ((change & PNG_FORMAT_FLAG_AFIRST) != 0)
-         {
-            /* Only relevant if there is an alpha channel - it's particularly
-             * important to handle this correctly because do_local_compose may
-             * be set above and then libpng will keep the alpha channel for this
-             * code to remove.
-             */
-            if ((format & PNG_FORMAT_FLAG_ALPHA) != 0)
-            {
-               /* Disable this if doing a local background,
-                * TODO: remove this when local background is no longer required.
-                */
-               if (do_local_background != 2)
-                  png_set_swap_alpha(png_ptr);
-            }
-
-            else
-               format &= ~PNG_FORMAT_FLAG_AFIRST;
-
-            change &= ~PNG_FORMAT_FLAG_AFIRST;
-         }
-#     endif
-
-      /* If the *output* is 16-bit then we need to check for a byte-swap on this
-       * architecture.
-       */
-      if (linear != 0)
-      {
-         png_uint_16 le = 0x0001;
-
-         if ((*(png_const_bytep) & le) != 0)
-            png_set_swap(png_ptr);
-      }
-
-      /* If change is not now 0 some transformation is missing - error out. */
-      if (change != 0)
-         png_error(png_ptr, "png_read_image: unsupported transformation");
-   }
-
-   PNG_SKIP_CHUNKS(png_ptr);
-
-   /* Update the 'info' structure and make sure the result is as required; first
-    * make sure to turn on the interlace handling if it will be required
-    * (because it can't be turned on *after* the call to png_read_update_info!)
-    *
-    * TODO: remove the do_local_background fixup below.
-    */
-   if (do_local_compose == 0 && do_local_background != 2)
-      passes = png_set_interlace_handling(png_ptr);
-
-   png_read_update_info(png_ptr, info_ptr);
-
-   {
-      png_uint_32 info_format = 0;
-
-      if ((info_ptr->color_type & PNG_COLOR_MASK_COLOR) != 0)
-         info_format |= PNG_FORMAT_FLAG_COLOR;
-
-      if ((info_ptr->color_type & PNG_COLOR_MASK_ALPHA) != 0)
-      {
-         /* do_local_compose removes this channel below. */
-         if (do_local_compose == 0)
-         {
-            /* do_local_background does the same if required. */
-            if (do_local_background != 2 ||
-               (format & PNG_FORMAT_FLAG_ALPHA) != 0)
-               info_format |= PNG_FORMAT_FLAG_ALPHA;
-         }
-      }
-
-      else if (do_local_compose != 0) /* internal error */
-         png_error(png_ptr, "png_image_read: alpha channel lost");
-
-      if ((format & PNG_FORMAT_FLAG_ASSOCIATED_ALPHA) != 0) {
-         info_format |= PNG_FORMAT_FLAG_ASSOCIATED_ALPHA;
-      }
-
-      if (info_ptr->bit_depth == 16)
-         info_format |= PNG_FORMAT_FLAG_LINEAR;
-
-#ifdef PNG_FORMAT_BGR_SUPPORTED
-      if ((png_ptr->transformations & PNG_BGR) != 0)
-         info_format |= PNG_FORMAT_FLAG_BGR;
-#endif
-
-#ifdef PNG_FORMAT_AFIRST_SUPPORTED
-         if (do_local_background == 2)
-         {
-            if ((format & PNG_FORMAT_FLAG_AFIRST) != 0)
-               info_format |= PNG_FORMAT_FLAG_AFIRST;
-         }
-
-         if ((png_ptr->transformations & PNG_SWAP_ALPHA) != 0 ||
-            ((png_ptr->transformations & PNG_ADD_ALPHA) != 0 &&
-            (png_ptr->flags & PNG_FLAG_FILLER_AFTER) == 0))
-         {
-            if (do_local_background == 2)
-               png_error(png_ptr, "unexpected alpha swap transformation");
-
-            info_format |= PNG_FORMAT_FLAG_AFIRST;
-         }
-#     endif
-
-      /* This is actually an internal error. */
-      if (info_format != format)
-         png_error(png_ptr, "png_read_image: invalid transformations");
-   }
-
-   /* Now read the rows.  If do_local_compose is set then it is necessary to use
-    * a local row buffer.  The output will be GA, RGBA or BGRA and must be
-    * converted to G, RGB or BGR as appropriate.  The 'local_row' member of the
-    * display acts as a flag.
-    */
-   {
-      png_voidp first_row = display->buffer;
-      ptrdiff_t row_bytes = display->row_stride;
-
-      if (linear != 0)
-         row_bytes *= 2;
-
-      /* The following expression is designed to work correctly whether it gives
-       * a signed or an unsigned result.
-       */
-      if (row_bytes < 0)
-      {
-         char *ptr = png_voidcast(char*, first_row);
-         ptr += (image->height-1) * (-row_bytes);
-         first_row = png_voidcast(png_voidp, ptr);
-      }
-
-      display->first_row = first_row;
-      display->row_bytes = row_bytes;
-   }
-
-   if (do_local_compose != 0)
-   {
-      int result;
-      png_voidp row = png_malloc(png_ptr, png_get_rowbytes(png_ptr, info_ptr));
-
-      display->local_row = row;
-      result = png_safe_execute(image, png_image_read_composite, display);
-      display->local_row = NULL;
-      png_free(png_ptr, row);
-
-      return result;
-   }
-
-   else if (do_local_background == 2)
-   {
-      int result;
-      png_voidp row = png_malloc(png_ptr, png_get_rowbytes(png_ptr, info_ptr));
-
-      display->local_row = row;
-      result = png_safe_execute(image, png_image_read_background, display);
-      display->local_row = NULL;
-      png_free(png_ptr, row);
-
-      return result;
-   }
-
-   else
-   {
-      png_alloc_size_t row_bytes = (png_alloc_size_t)display->row_bytes;
-
-      while (--passes >= 0)
-      {
-         png_uint_32      y = image->height;
-         png_bytep        row = png_voidcast(png_bytep, display->first_row);
-
-         for (; y > 0; --y)
-         {
-            png_read_row(png_ptr, row, NULL);
-            row += row_bytes;
-         }
-      }
-
-      return 1;
-   }
-}
-
-int PNGAPI
-png_image_finish_read(png_imagep image, png_const_colorp background,
-    void *buffer, png_int_32 row_stride, void *colormap)
-{
-   if (image != NULL && image->version == PNG_IMAGE_VERSION)
-   {
-      /* Check for row_stride overflow.  This check is not performed on the
-       * original PNG format because it may not occur in the output PNG format
-       * and libpng deals with the issues of reading the original.
-       */
-      unsigned int channels = PNG_IMAGE_PIXEL_CHANNELS(image->format);
-
-      /* The following checks just the 'row_stride' calculation to ensure it
-       * fits in a signed 32-bit value.  Because channels/components can be
-       * either 1 or 2 bytes in size the length of a row can still overflow 32
-       * bits; this is just to verify that the 'row_stride' argument can be
-       * represented.
-       */
-      if (image->width <= 0x7fffffffU/channels) /* no overflow */
-      {
-         png_uint_32 check;
-         png_uint_32 png_row_stride = image->width * channels;
-
-         if (row_stride == 0)
-            row_stride = (png_int_32)/*SAFE*/png_row_stride;
-
-         if (row_stride < 0)
-            check = (png_uint_32)(-row_stride);
-
-         else
-            check = (png_uint_32)row_stride;
-
-         /* This verifies 'check', the absolute value of the actual stride
-          * passed in and detects overflow in the application calculation (i.e.
-          * if the app did actually pass in a non-zero 'row_stride'.
-          */
-         if (image->opaque != NULL && buffer != NULL && check >= png_row_stride)
-         {
-            /* Now check for overflow of the image buffer calculation; this
-             * limits the whole image size to 32 bits for API compatibility with
-             * the current, 32-bit, PNG_IMAGE_BUFFER_SIZE macro.
-             *
-             * The PNG_IMAGE_BUFFER_SIZE macro is:
-             *
-             *    (PNG_IMAGE_PIXEL_COMPONENT_SIZE(fmt)*height*(row_stride))
-             *
-             * And the component size is always 1 or 2, so make sure that the
-             * number of *bytes* that the application is saying are available
-             * does actually fit into a 32-bit number.
-             *
-             * NOTE: this will be changed in 1.7 because PNG_IMAGE_BUFFER_SIZE
-             * will be changed to use png_alloc_size_t; bigger images can be
-             * accommodated on 64-bit systems.
-             */
-            if (image->height <=
-                0xffffffffU/PNG_IMAGE_PIXEL_COMPONENT_SIZE(image->format)/check)
-            {
-               if ((image->format & PNG_FORMAT_FLAG_COLORMAP) == 0 ||
-                  (image->colormap_entries > 0 && colormap != NULL))
-               {
-                  int result;
-                  png_image_read_control display;
-
-                  memset(&display, 0, (sizeof display));
-                  display.image = image;
-                  display.buffer = buffer;
-                  display.row_stride = row_stride;
-                  display.colormap = colormap;
-                  display.background = background;
-                  display.local_row = NULL;
-
-                  /* Choose the correct 'end' routine; for the color-map case
-                   * all the setup has already been done.
-                   */
-                  if ((image->format & PNG_FORMAT_FLAG_COLORMAP) != 0)
-                     result =
-                         png_safe_execute(image,
-                             png_image_read_colormap, &display) &&
-                             png_safe_execute(image,
-                             png_image_read_colormapped, &display);
-
-                  else
-                     result =
-                        png_safe_execute(image,
-                            png_image_read_direct, &display);
-
-                  png_image_free(image);
-                  return result;
-               }
-
-               else
-                  return png_image_error(image,
-                      "png_image_finish_read[color-map]: no color-map");
-            }
-
-            else
-               return png_image_error(image,
-                   "png_image_finish_read: image too large");
-         }
-
-         else
-            return png_image_error(image,
-                "png_image_finish_read: invalid argument");
-      }
-
-      else
-         return png_image_error(image,
-             "png_image_finish_read: row_stride too large");
-   }
-
-   else if (image != NULL)
-      return png_image_error(image,
-          "png_image_finish_read: damaged PNG_IMAGE_VERSION");
-
-   return 0;
-}
-
-#endif /* SIMPLIFIED_READ */
-#endif /* READ */
diff --git a/dep/libpng/src/pngrio.c b/dep/libpng/src/pngrio.c
deleted file mode 100644
index 794635810..000000000
--- a/dep/libpng/src/pngrio.c
+++ /dev/null
@@ -1,120 +0,0 @@
-
-/* pngrio.c - functions for data input
- *
- * Copyright (c) 2018 Cosmin Truta
- * Copyright (c) 1998-2002,2004,2006-2016,2018 Glenn Randers-Pehrson
- * Copyright (c) 1996-1997 Andreas Dilger
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- *
- * This file provides a location for all input.  Users who need
- * special handling are expected to write a function that has the same
- * arguments as this and performs a similar function, but that possibly
- * has a different input method.  Note that you shouldn't change this
- * function, but rather write a replacement function and then make
- * libpng use it at run time with png_set_read_fn(...).
- */
-
-#include "pngpriv.h"
-
-#ifdef PNG_READ_SUPPORTED
-
-/* Read the data from whatever input you are using.  The default routine
- * reads from a file pointer.  Note that this routine sometimes gets called
- * with very small lengths, so you should implement some kind of simple
- * buffering if you are using unbuffered reads.  This should never be asked
- * to read more than 64K on a 16-bit machine.
- */
-void /* PRIVATE */
-png_read_data(png_structrp png_ptr, png_bytep data, size_t length)
-{
-   png_debug1(4, "reading %d bytes", (int)length);
-
-   if (png_ptr->read_data_fn != NULL)
-      (*(png_ptr->read_data_fn))(png_ptr, data, length);
-
-   else
-      png_error(png_ptr, "Call to NULL read function");
-}
-
-#ifdef PNG_STDIO_SUPPORTED
-/* This is the function that does the actual reading of data.  If you are
- * not reading from a standard C stream, you should create a replacement
- * read_data function and use it at run time with png_set_read_fn(), rather
- * than changing the library.
- */
-void PNGCBAPI
-png_default_read_data(png_structp png_ptr, png_bytep data, size_t length)
-{
-   size_t check;
-
-   if (png_ptr == NULL)
-      return;
-
-   /* fread() returns 0 on error, so it is OK to store this in a size_t
-    * instead of an int, which is what fread() actually returns.
-    */
-   check = fread(data, 1, length, png_voidcast(png_FILE_p, png_ptr->io_ptr));
-
-   if (check != length)
-      png_error(png_ptr, "Read Error");
-}
-#endif
-
-/* This function allows the application to supply a new input function
- * for libpng if standard C streams aren't being used.
- *
- * This function takes as its arguments:
- *
- * png_ptr      - pointer to a png input data structure
- *
- * io_ptr       - pointer to user supplied structure containing info about
- *                the input functions.  May be NULL.
- *
- * read_data_fn - pointer to a new input function that takes as its
- *                arguments a pointer to a png_struct, a pointer to
- *                a location where input data can be stored, and a 32-bit
- *                unsigned int that is the number of bytes to be read.
- *                To exit and output any fatal error messages the new write
- *                function should call png_error(png_ptr, "Error msg").
- *                May be NULL, in which case libpng's default function will
- *                be used.
- */
-void PNGAPI
-png_set_read_fn(png_structrp png_ptr, png_voidp io_ptr,
-    png_rw_ptr read_data_fn)
-{
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->io_ptr = io_ptr;
-
-#ifdef PNG_STDIO_SUPPORTED
-   if (read_data_fn != NULL)
-      png_ptr->read_data_fn = read_data_fn;
-
-   else
-      png_ptr->read_data_fn = png_default_read_data;
-#else
-   png_ptr->read_data_fn = read_data_fn;
-#endif
-
-#ifdef PNG_WRITE_SUPPORTED
-   /* It is an error to write to a read device */
-   if (png_ptr->write_data_fn != NULL)
-   {
-      png_ptr->write_data_fn = NULL;
-      png_warning(png_ptr,
-          "Can't set both read_data_fn and write_data_fn in the"
-          " same structure");
-   }
-#endif
-
-#ifdef PNG_WRITE_FLUSH_SUPPORTED
-   png_ptr->output_flush_fn = NULL;
-#endif
-}
-#endif /* READ */
diff --git a/dep/libpng/src/pngrtran.c b/dep/libpng/src/pngrtran.c
deleted file mode 100644
index 1526123e0..000000000
--- a/dep/libpng/src/pngrtran.c
+++ /dev/null
@@ -1,5040 +0,0 @@
-
-/* pngrtran.c - transforms the data in a row for PNG readers
- *
- * Copyright (c) 2018-2024 Cosmin Truta
- * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
- * Copyright (c) 1996-1997 Andreas Dilger
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- *
- * This file contains functions optionally called by an application
- * in order to tell libpng how to handle data when reading a PNG.
- * Transformations that are used in both reading and writing are
- * in pngtrans.c.
- */
-
-#include "pngpriv.h"
-
-#ifdef PNG_ARM_NEON_IMPLEMENTATION
-#  if PNG_ARM_NEON_IMPLEMENTATION == 1
-#    define PNG_ARM_NEON_INTRINSICS_AVAILABLE
-#    if defined(_MSC_VER) && !defined(__clang__) && defined(_M_ARM64)
-#      include <arm64_neon.h>
-#    else
-#      include <arm_neon.h>
-#    endif
-#  endif
-#endif
-
-#ifdef PNG_READ_SUPPORTED
-
-/* Set the action on getting a CRC error for an ancillary or critical chunk. */
-void PNGAPI
-png_set_crc_action(png_structrp png_ptr, int crit_action, int ancil_action)
-{
-   png_debug(1, "in png_set_crc_action");
-
-   if (png_ptr == NULL)
-      return;
-
-   /* Tell libpng how we react to CRC errors in critical chunks */
-   switch (crit_action)
-   {
-      case PNG_CRC_NO_CHANGE:                        /* Leave setting as is */
-         break;
-
-      case PNG_CRC_WARN_USE:                               /* Warn/use data */
-         png_ptr->flags &= ~PNG_FLAG_CRC_CRITICAL_MASK;
-         png_ptr->flags |= PNG_FLAG_CRC_CRITICAL_USE;
-         break;
-
-      case PNG_CRC_QUIET_USE:                             /* Quiet/use data */
-         png_ptr->flags &= ~PNG_FLAG_CRC_CRITICAL_MASK;
-         png_ptr->flags |= PNG_FLAG_CRC_CRITICAL_USE |
-                           PNG_FLAG_CRC_CRITICAL_IGNORE;
-         break;
-
-      case PNG_CRC_WARN_DISCARD:    /* Not a valid action for critical data */
-         png_warning(png_ptr,
-             "Can't discard critical data on CRC error");
-         /* FALLTHROUGH */
-      case PNG_CRC_ERROR_QUIT:                                /* Error/quit */
-
-      case PNG_CRC_DEFAULT:
-      default:
-         png_ptr->flags &= ~PNG_FLAG_CRC_CRITICAL_MASK;
-         break;
-   }
-
-   /* Tell libpng how we react to CRC errors in ancillary chunks */
-   switch (ancil_action)
-   {
-      case PNG_CRC_NO_CHANGE:                       /* Leave setting as is */
-         break;
-
-      case PNG_CRC_WARN_USE:                              /* Warn/use data */
-         png_ptr->flags &= ~PNG_FLAG_CRC_ANCILLARY_MASK;
-         png_ptr->flags |= PNG_FLAG_CRC_ANCILLARY_USE;
-         break;
-
-      case PNG_CRC_QUIET_USE:                            /* Quiet/use data */
-         png_ptr->flags &= ~PNG_FLAG_CRC_ANCILLARY_MASK;
-         png_ptr->flags |= PNG_FLAG_CRC_ANCILLARY_USE |
-                           PNG_FLAG_CRC_ANCILLARY_NOWARN;
-         break;
-
-      case PNG_CRC_ERROR_QUIT:                               /* Error/quit */
-         png_ptr->flags &= ~PNG_FLAG_CRC_ANCILLARY_MASK;
-         png_ptr->flags |= PNG_FLAG_CRC_ANCILLARY_NOWARN;
-         break;
-
-      case PNG_CRC_WARN_DISCARD:                      /* Warn/discard data */
-
-      case PNG_CRC_DEFAULT:
-      default:
-         png_ptr->flags &= ~PNG_FLAG_CRC_ANCILLARY_MASK;
-         break;
-   }
-}
-
-#ifdef PNG_READ_TRANSFORMS_SUPPORTED
-/* Is it OK to set a transformation now?  Only if png_start_read_image or
- * png_read_update_info have not been called.  It is not necessary for the IHDR
- * to have been read in all cases; the need_IHDR parameter allows for this
- * check too.
- */
-static int
-png_rtran_ok(png_structrp png_ptr, int need_IHDR)
-{
-   if (png_ptr != NULL)
-   {
-      if ((png_ptr->flags & PNG_FLAG_ROW_INIT) != 0)
-         png_app_error(png_ptr,
-             "invalid after png_start_read_image or png_read_update_info");
-
-      else if (need_IHDR && (png_ptr->mode & PNG_HAVE_IHDR) == 0)
-         png_app_error(png_ptr, "invalid before the PNG header has been read");
-
-      else
-      {
-         /* Turn on failure to initialize correctly for all transforms. */
-         png_ptr->flags |= PNG_FLAG_DETECT_UNINITIALIZED;
-
-         return 1; /* Ok */
-      }
-   }
-
-   return 0; /* no png_error possible! */
-}
-#endif
-
-#ifdef PNG_READ_BACKGROUND_SUPPORTED
-/* Handle alpha and tRNS via a background color */
-void PNGFAPI
-png_set_background_fixed(png_structrp png_ptr,
-    png_const_color_16p background_color, int background_gamma_code,
-    int need_expand, png_fixed_point background_gamma)
-{
-   png_debug(1, "in png_set_background_fixed");
-
-   if (png_rtran_ok(png_ptr, 0) == 0 || background_color == NULL)
-      return;
-
-   if (background_gamma_code == PNG_BACKGROUND_GAMMA_UNKNOWN)
-   {
-      png_warning(png_ptr, "Application must supply a known background gamma");
-      return;
-   }
-
-   png_ptr->transformations |= PNG_COMPOSE | PNG_STRIP_ALPHA;
-   png_ptr->transformations &= ~PNG_ENCODE_ALPHA;
-   png_ptr->flags &= ~PNG_FLAG_OPTIMIZE_ALPHA;
-
-   png_ptr->background = *background_color;
-   png_ptr->background_gamma = background_gamma;
-   png_ptr->background_gamma_type = (png_byte)(background_gamma_code);
-   if (need_expand != 0)
-      png_ptr->transformations |= PNG_BACKGROUND_EXPAND;
-   else
-      png_ptr->transformations &= ~PNG_BACKGROUND_EXPAND;
-}
-
-#  ifdef PNG_FLOATING_POINT_SUPPORTED
-void PNGAPI
-png_set_background(png_structrp png_ptr,
-    png_const_color_16p background_color, int background_gamma_code,
-    int need_expand, double background_gamma)
-{
-   png_set_background_fixed(png_ptr, background_color, background_gamma_code,
-      need_expand, png_fixed(png_ptr, background_gamma, "png_set_background"));
-}
-#  endif /* FLOATING_POINT */
-#endif /* READ_BACKGROUND */
-
-/* Scale 16-bit depth files to 8-bit depth.  If both of these are set then the
- * one that pngrtran does first (scale) happens.  This is necessary to allow the
- * TRANSFORM and API behavior to be somewhat consistent, and it's simpler.
- */
-#ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED
-void PNGAPI
-png_set_scale_16(png_structrp png_ptr)
-{
-   png_debug(1, "in png_set_scale_16");
-
-   if (png_rtran_ok(png_ptr, 0) == 0)
-      return;
-
-   png_ptr->transformations |= PNG_SCALE_16_TO_8;
-}
-#endif
-
-#ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED
-/* Chop 16-bit depth files to 8-bit depth */
-void PNGAPI
-png_set_strip_16(png_structrp png_ptr)
-{
-   png_debug(1, "in png_set_strip_16");
-
-   if (png_rtran_ok(png_ptr, 0) == 0)
-      return;
-
-   png_ptr->transformations |= PNG_16_TO_8;
-}
-#endif
-
-#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED
-void PNGAPI
-png_set_strip_alpha(png_structrp png_ptr)
-{
-   png_debug(1, "in png_set_strip_alpha");
-
-   if (png_rtran_ok(png_ptr, 0) == 0)
-      return;
-
-   png_ptr->transformations |= PNG_STRIP_ALPHA;
-}
-#endif
-
-#if defined(PNG_READ_ALPHA_MODE_SUPPORTED) || defined(PNG_READ_GAMMA_SUPPORTED)
-static png_fixed_point
-translate_gamma_flags(png_structrp png_ptr, png_fixed_point output_gamma,
-    int is_screen)
-{
-   /* Check for flag values.  The main reason for having the old Mac value as a
-    * flag is that it is pretty near impossible to work out what the correct
-    * value is from Apple documentation - a working Mac system is needed to
-    * discover the value!
-    */
-   if (output_gamma == PNG_DEFAULT_sRGB ||
-      output_gamma == PNG_FP_1 / PNG_DEFAULT_sRGB)
-   {
-      /* If there is no sRGB support this just sets the gamma to the standard
-       * sRGB value.  (This is a side effect of using this function!)
-       */
-#     ifdef PNG_READ_sRGB_SUPPORTED
-         png_ptr->flags |= PNG_FLAG_ASSUME_sRGB;
-#     else
-         PNG_UNUSED(png_ptr)
-#     endif
-      if (is_screen != 0)
-         output_gamma = PNG_GAMMA_sRGB;
-      else
-         output_gamma = PNG_GAMMA_sRGB_INVERSE;
-   }
-
-   else if (output_gamma == PNG_GAMMA_MAC_18 ||
-      output_gamma == PNG_FP_1 / PNG_GAMMA_MAC_18)
-   {
-      if (is_screen != 0)
-         output_gamma = PNG_GAMMA_MAC_OLD;
-      else
-         output_gamma = PNG_GAMMA_MAC_INVERSE;
-   }
-
-   return output_gamma;
-}
-
-#  ifdef PNG_FLOATING_POINT_SUPPORTED
-static png_fixed_point
-convert_gamma_value(png_structrp png_ptr, double output_gamma)
-{
-   /* The following silently ignores cases where fixed point (times 100,000)
-    * gamma values are passed to the floating point API.  This is safe and it
-    * means the fixed point constants work just fine with the floating point
-    * API.  The alternative would just lead to undetected errors and spurious
-    * bug reports.  Negative values fail inside the _fixed API unless they
-    * correspond to the flag values.
-    */
-   if (output_gamma > 0 && output_gamma < 128)
-      output_gamma *= PNG_FP_1;
-
-   /* This preserves -1 and -2 exactly: */
-   output_gamma = floor(output_gamma + .5);
-
-   if (output_gamma > PNG_FP_MAX || output_gamma < PNG_FP_MIN)
-      png_fixed_error(png_ptr, "gamma value");
-
-   return (png_fixed_point)output_gamma;
-}
-#  endif
-#endif /* READ_ALPHA_MODE || READ_GAMMA */
-
-#ifdef PNG_READ_ALPHA_MODE_SUPPORTED
-void PNGFAPI
-png_set_alpha_mode_fixed(png_structrp png_ptr, int mode,
-    png_fixed_point output_gamma)
-{
-   int compose = 0;
-   png_fixed_point file_gamma;
-
-   png_debug(1, "in png_set_alpha_mode_fixed");
-
-   if (png_rtran_ok(png_ptr, 0) == 0)
-      return;
-
-   output_gamma = translate_gamma_flags(png_ptr, output_gamma, 1/*screen*/);
-
-   /* Validate the value to ensure it is in a reasonable range.  The value
-    * is expected to be 1 or greater, but this range test allows for some
-    * viewing correction values.  The intent is to weed out the API users
-    * who might use the inverse of the gamma value accidentally!
-    *
-    * In libpng 1.6.0, we changed from 0.07..3 to 0.01..100, to accommodate
-    * the optimal 16-bit gamma of 36 and its reciprocal.
-    */
-   if (output_gamma < 1000 || output_gamma > 10000000)
-      png_error(png_ptr, "output gamma out of expected range");
-
-   /* The default file gamma is the inverse of the output gamma; the output
-    * gamma may be changed below so get the file value first:
-    */
-   file_gamma = png_reciprocal(output_gamma);
-
-   /* There are really 8 possibilities here, composed of any combination
-    * of:
-    *
-    *    premultiply the color channels
-    *    do not encode non-opaque pixels
-    *    encode the alpha as well as the color channels
-    *
-    * The differences disappear if the input/output ('screen') gamma is 1.0,
-    * because then the encoding is a no-op and there is only the choice of
-    * premultiplying the color channels or not.
-    *
-    * png_set_alpha_mode and png_set_background interact because both use
-    * png_compose to do the work.  Calling both is only useful when
-    * png_set_alpha_mode is used to set the default mode - PNG_ALPHA_PNG - along
-    * with a default gamma value.  Otherwise PNG_COMPOSE must not be set.
-    */
-   switch (mode)
-   {
-      case PNG_ALPHA_PNG:        /* default: png standard */
-         /* No compose, but it may be set by png_set_background! */
-         png_ptr->transformations &= ~PNG_ENCODE_ALPHA;
-         png_ptr->flags &= ~PNG_FLAG_OPTIMIZE_ALPHA;
-         break;
-
-      case PNG_ALPHA_ASSOCIATED: /* color channels premultiplied */
-         compose = 1;
-         png_ptr->transformations &= ~PNG_ENCODE_ALPHA;
-         png_ptr->flags &= ~PNG_FLAG_OPTIMIZE_ALPHA;
-         /* The output is linear: */
-         output_gamma = PNG_FP_1;
-         break;
-
-      case PNG_ALPHA_OPTIMIZED:  /* associated, non-opaque pixels linear */
-         compose = 1;
-         png_ptr->transformations &= ~PNG_ENCODE_ALPHA;
-         png_ptr->flags |= PNG_FLAG_OPTIMIZE_ALPHA;
-         /* output_gamma records the encoding of opaque pixels! */
-         break;
-
-      case PNG_ALPHA_BROKEN:     /* associated, non-linear, alpha encoded */
-         compose = 1;
-         png_ptr->transformations |= PNG_ENCODE_ALPHA;
-         png_ptr->flags &= ~PNG_FLAG_OPTIMIZE_ALPHA;
-         break;
-
-      default:
-         png_error(png_ptr, "invalid alpha mode");
-   }
-
-   /* Only set the default gamma if the file gamma has not been set (this has
-    * the side effect that the gamma in a second call to png_set_alpha_mode will
-    * be ignored.)
-    */
-   if (png_ptr->colorspace.gamma == 0)
-   {
-      png_ptr->colorspace.gamma = file_gamma;
-      png_ptr->colorspace.flags |= PNG_COLORSPACE_HAVE_GAMMA;
-   }
-
-   /* But always set the output gamma: */
-   png_ptr->screen_gamma = output_gamma;
-
-   /* Finally, if pre-multiplying, set the background fields to achieve the
-    * desired result.
-    */
-   if (compose != 0)
-   {
-      /* And obtain alpha pre-multiplication by composing on black: */
-      memset(&png_ptr->background, 0, (sizeof png_ptr->background));
-      png_ptr->background_gamma = png_ptr->colorspace.gamma; /* just in case */
-      png_ptr->background_gamma_type = PNG_BACKGROUND_GAMMA_FILE;
-      png_ptr->transformations &= ~PNG_BACKGROUND_EXPAND;
-
-      if ((png_ptr->transformations & PNG_COMPOSE) != 0)
-         png_error(png_ptr,
-             "conflicting calls to set alpha mode and background");
-
-      png_ptr->transformations |= PNG_COMPOSE;
-   }
-}
-
-#  ifdef PNG_FLOATING_POINT_SUPPORTED
-void PNGAPI
-png_set_alpha_mode(png_structrp png_ptr, int mode, double output_gamma)
-{
-   png_set_alpha_mode_fixed(png_ptr, mode, convert_gamma_value(png_ptr,
-       output_gamma));
-}
-#  endif
-#endif
-
-#ifdef PNG_READ_QUANTIZE_SUPPORTED
-/* Dither file to 8-bit.  Supply a palette, the current number
- * of elements in the palette, the maximum number of elements
- * allowed, and a histogram if possible.  If the current number
- * of colors is greater than the maximum number, the palette will be
- * modified to fit in the maximum number.  "full_quantize" indicates
- * whether we need a quantizing cube set up for RGB images, or if we
- * simply are reducing the number of colors in a paletted image.
- */
-
-typedef struct png_dsort_struct
-{
-   struct png_dsort_struct * next;
-   png_byte left;
-   png_byte right;
-} png_dsort;
-typedef png_dsort *   png_dsortp;
-typedef png_dsort * * png_dsortpp;
-
-void PNGAPI
-png_set_quantize(png_structrp png_ptr, png_colorp palette,
-    int num_palette, int maximum_colors, png_const_uint_16p histogram,
-    int full_quantize)
-{
-   png_debug(1, "in png_set_quantize");
-
-   if (png_rtran_ok(png_ptr, 0) == 0)
-      return;
-
-   png_ptr->transformations |= PNG_QUANTIZE;
-
-   if (full_quantize == 0)
-   {
-      int i;
-
-      png_ptr->quantize_index = (png_bytep)png_malloc(png_ptr,
-          (png_alloc_size_t)num_palette);
-      for (i = 0; i < num_palette; i++)
-         png_ptr->quantize_index[i] = (png_byte)i;
-   }
-
-   if (num_palette > maximum_colors)
-   {
-      if (histogram != NULL)
-      {
-         /* This is easy enough, just throw out the least used colors.
-          * Perhaps not the best solution, but good enough.
-          */
-
-         int i;
-
-         /* Initialize an array to sort colors */
-         png_ptr->quantize_sort = (png_bytep)png_malloc(png_ptr,
-             (png_alloc_size_t)num_palette);
-
-         /* Initialize the quantize_sort array */
-         for (i = 0; i < num_palette; i++)
-            png_ptr->quantize_sort[i] = (png_byte)i;
-
-         /* Find the least used palette entries by starting a
-          * bubble sort, and running it until we have sorted
-          * out enough colors.  Note that we don't care about
-          * sorting all the colors, just finding which are
-          * least used.
-          */
-
-         for (i = num_palette - 1; i >= maximum_colors; i--)
-         {
-            int done; /* To stop early if the list is pre-sorted */
-            int j;
-
-            done = 1;
-            for (j = 0; j < i; j++)
-            {
-               if (histogram[png_ptr->quantize_sort[j]]
-                   < histogram[png_ptr->quantize_sort[j + 1]])
-               {
-                  png_byte t;
-
-                  t = png_ptr->quantize_sort[j];
-                  png_ptr->quantize_sort[j] = png_ptr->quantize_sort[j + 1];
-                  png_ptr->quantize_sort[j + 1] = t;
-                  done = 0;
-               }
-            }
-
-            if (done != 0)
-               break;
-         }
-
-         /* Swap the palette around, and set up a table, if necessary */
-         if (full_quantize != 0)
-         {
-            int j = num_palette;
-
-            /* Put all the useful colors within the max, but don't
-             * move the others.
-             */
-            for (i = 0; i < maximum_colors; i++)
-            {
-               if ((int)png_ptr->quantize_sort[i] >= maximum_colors)
-               {
-                  do
-                     j--;
-                  while ((int)png_ptr->quantize_sort[j] >= maximum_colors);
-
-                  palette[i] = palette[j];
-               }
-            }
-         }
-         else
-         {
-            int j = num_palette;
-
-            /* Move all the used colors inside the max limit, and
-             * develop a translation table.
-             */
-            for (i = 0; i < maximum_colors; i++)
-            {
-               /* Only move the colors we need to */
-               if ((int)png_ptr->quantize_sort[i] >= maximum_colors)
-               {
-                  png_color tmp_color;
-
-                  do
-                     j--;
-                  while ((int)png_ptr->quantize_sort[j] >= maximum_colors);
-
-                  tmp_color = palette[j];
-                  palette[j] = palette[i];
-                  palette[i] = tmp_color;
-                  /* Indicate where the color went */
-                  png_ptr->quantize_index[j] = (png_byte)i;
-                  png_ptr->quantize_index[i] = (png_byte)j;
-               }
-            }
-
-            /* Find closest color for those colors we are not using */
-            for (i = 0; i < num_palette; i++)
-            {
-               if ((int)png_ptr->quantize_index[i] >= maximum_colors)
-               {
-                  int min_d, k, min_k, d_index;
-
-                  /* Find the closest color to one we threw out */
-                  d_index = png_ptr->quantize_index[i];
-                  min_d = PNG_COLOR_DIST(palette[d_index], palette[0]);
-                  for (k = 1, min_k = 0; k < maximum_colors; k++)
-                  {
-                     int d;
-
-                     d = PNG_COLOR_DIST(palette[d_index], palette[k]);
-
-                     if (d < min_d)
-                     {
-                        min_d = d;
-                        min_k = k;
-                     }
-                  }
-                  /* Point to closest color */
-                  png_ptr->quantize_index[i] = (png_byte)min_k;
-               }
-            }
-         }
-         png_free(png_ptr, png_ptr->quantize_sort);
-         png_ptr->quantize_sort = NULL;
-      }
-      else
-      {
-         /* This is much harder to do simply (and quickly).  Perhaps
-          * we need to go through a median cut routine, but those
-          * don't always behave themselves with only a few colors
-          * as input.  So we will just find the closest two colors,
-          * and throw out one of them (chosen somewhat randomly).
-          * [We don't understand this at all, so if someone wants to
-          *  work on improving it, be our guest - AED, GRP]
-          */
-         int i;
-         int max_d;
-         int num_new_palette;
-         png_dsortp t;
-         png_dsortpp hash;
-
-         t = NULL;
-
-         /* Initialize palette index arrays */
-         png_ptr->index_to_palette = (png_bytep)png_malloc(png_ptr,
-             (png_alloc_size_t)num_palette);
-         png_ptr->palette_to_index = (png_bytep)png_malloc(png_ptr,
-             (png_alloc_size_t)num_palette);
-
-         /* Initialize the sort array */
-         for (i = 0; i < num_palette; i++)
-         {
-            png_ptr->index_to_palette[i] = (png_byte)i;
-            png_ptr->palette_to_index[i] = (png_byte)i;
-         }
-
-         hash = (png_dsortpp)png_calloc(png_ptr, (png_alloc_size_t)(769 *
-             (sizeof (png_dsortp))));
-
-         num_new_palette = num_palette;
-
-         /* Initial wild guess at how far apart the farthest pixel
-          * pair we will be eliminating will be.  Larger
-          * numbers mean more areas will be allocated, Smaller
-          * numbers run the risk of not saving enough data, and
-          * having to do this all over again.
-          *
-          * I have not done extensive checking on this number.
-          */
-         max_d = 96;
-
-         while (num_new_palette > maximum_colors)
-         {
-            for (i = 0; i < num_new_palette - 1; i++)
-            {
-               int j;
-
-               for (j = i + 1; j < num_new_palette; j++)
-               {
-                  int d;
-
-                  d = PNG_COLOR_DIST(palette[i], palette[j]);
-
-                  if (d <= max_d)
-                  {
-
-                     t = (png_dsortp)png_malloc_warn(png_ptr,
-                         (png_alloc_size_t)(sizeof (png_dsort)));
-
-                     if (t == NULL)
-                         break;
-
-                     t->next = hash[d];
-                     t->left = (png_byte)i;
-                     t->right = (png_byte)j;
-                     hash[d] = t;
-                  }
-               }
-               if (t == NULL)
-                  break;
-            }
-
-            if (t != NULL)
-            for (i = 0; i <= max_d; i++)
-            {
-               if (hash[i] != NULL)
-               {
-                  png_dsortp p;
-
-                  for (p = hash[i]; p; p = p->next)
-                  {
-                     if ((int)png_ptr->index_to_palette[p->left]
-                         < num_new_palette &&
-                         (int)png_ptr->index_to_palette[p->right]
-                         < num_new_palette)
-                     {
-                        int j, next_j;
-
-                        if (num_new_palette & 0x01)
-                        {
-                           j = p->left;
-                           next_j = p->right;
-                        }
-                        else
-                        {
-                           j = p->right;
-                           next_j = p->left;
-                        }
-
-                        num_new_palette--;
-                        palette[png_ptr->index_to_palette[j]]
-                            = palette[num_new_palette];
-                        if (full_quantize == 0)
-                        {
-                           int k;
-
-                           for (k = 0; k < num_palette; k++)
-                           {
-                              if (png_ptr->quantize_index[k] ==
-                                  png_ptr->index_to_palette[j])
-                                 png_ptr->quantize_index[k] =
-                                     png_ptr->index_to_palette[next_j];
-
-                              if ((int)png_ptr->quantize_index[k] ==
-                                  num_new_palette)
-                                 png_ptr->quantize_index[k] =
-                                     png_ptr->index_to_palette[j];
-                           }
-                        }
-
-                        png_ptr->index_to_palette[png_ptr->palette_to_index
-                            [num_new_palette]] = png_ptr->index_to_palette[j];
-
-                        png_ptr->palette_to_index[png_ptr->index_to_palette[j]]
-                            = png_ptr->palette_to_index[num_new_palette];
-
-                        png_ptr->index_to_palette[j] =
-                            (png_byte)num_new_palette;
-
-                        png_ptr->palette_to_index[num_new_palette] =
-                            (png_byte)j;
-                     }
-                     if (num_new_palette <= maximum_colors)
-                        break;
-                  }
-                  if (num_new_palette <= maximum_colors)
-                     break;
-               }
-            }
-
-            for (i = 0; i < 769; i++)
-            {
-               if (hash[i] != NULL)
-               {
-                  png_dsortp p = hash[i];
-                  while (p)
-                  {
-                     t = p->next;
-                     png_free(png_ptr, p);
-                     p = t;
-                  }
-               }
-               hash[i] = 0;
-            }
-            max_d += 96;
-         }
-         png_free(png_ptr, hash);
-         png_free(png_ptr, png_ptr->palette_to_index);
-         png_free(png_ptr, png_ptr->index_to_palette);
-         png_ptr->palette_to_index = NULL;
-         png_ptr->index_to_palette = NULL;
-      }
-      num_palette = maximum_colors;
-   }
-   if (png_ptr->palette == NULL)
-   {
-      png_ptr->palette = palette;
-   }
-   png_ptr->num_palette = (png_uint_16)num_palette;
-
-   if (full_quantize != 0)
-   {
-      int i;
-      png_bytep distance;
-      int total_bits = PNG_QUANTIZE_RED_BITS + PNG_QUANTIZE_GREEN_BITS +
-          PNG_QUANTIZE_BLUE_BITS;
-      int num_red = (1 << PNG_QUANTIZE_RED_BITS);
-      int num_green = (1 << PNG_QUANTIZE_GREEN_BITS);
-      int num_blue = (1 << PNG_QUANTIZE_BLUE_BITS);
-      size_t num_entries = ((size_t)1 << total_bits);
-
-      png_ptr->palette_lookup = (png_bytep)png_calloc(png_ptr,
-          (png_alloc_size_t)(num_entries));
-
-      distance = (png_bytep)png_malloc(png_ptr, (png_alloc_size_t)num_entries);
-
-      memset(distance, 0xff, num_entries);
-
-      for (i = 0; i < num_palette; i++)
-      {
-         int ir, ig, ib;
-         int r = (palette[i].red >> (8 - PNG_QUANTIZE_RED_BITS));
-         int g = (palette[i].green >> (8 - PNG_QUANTIZE_GREEN_BITS));
-         int b = (palette[i].blue >> (8 - PNG_QUANTIZE_BLUE_BITS));
-
-         for (ir = 0; ir < num_red; ir++)
-         {
-            /* int dr = abs(ir - r); */
-            int dr = ((ir > r) ? ir - r : r - ir);
-            int index_r = (ir << (PNG_QUANTIZE_BLUE_BITS +
-                PNG_QUANTIZE_GREEN_BITS));
-
-            for (ig = 0; ig < num_green; ig++)
-            {
-               /* int dg = abs(ig - g); */
-               int dg = ((ig > g) ? ig - g : g - ig);
-               int dt = dr + dg;
-               int dm = ((dr > dg) ? dr : dg);
-               int index_g = index_r | (ig << PNG_QUANTIZE_BLUE_BITS);
-
-               for (ib = 0; ib < num_blue; ib++)
-               {
-                  int d_index = index_g | ib;
-                  /* int db = abs(ib - b); */
-                  int db = ((ib > b) ? ib - b : b - ib);
-                  int dmax = ((dm > db) ? dm : db);
-                  int d = dmax + dt + db;
-
-                  if (d < (int)distance[d_index])
-                  {
-                     distance[d_index] = (png_byte)d;
-                     png_ptr->palette_lookup[d_index] = (png_byte)i;
-                  }
-               }
-            }
-         }
-      }
-
-      png_free(png_ptr, distance);
-   }
-}
-#endif /* READ_QUANTIZE */
-
-#ifdef PNG_READ_GAMMA_SUPPORTED
-void PNGFAPI
-png_set_gamma_fixed(png_structrp png_ptr, png_fixed_point scrn_gamma,
-    png_fixed_point file_gamma)
-{
-   png_debug(1, "in png_set_gamma_fixed");
-
-   if (png_rtran_ok(png_ptr, 0) == 0)
-      return;
-
-   /* New in libpng-1.5.4 - reserve particular negative values as flags. */
-   scrn_gamma = translate_gamma_flags(png_ptr, scrn_gamma, 1/*screen*/);
-   file_gamma = translate_gamma_flags(png_ptr, file_gamma, 0/*file*/);
-
-   /* Checking the gamma values for being >0 was added in 1.5.4 along with the
-    * premultiplied alpha support; this actually hides an undocumented feature
-    * of the previous implementation which allowed gamma processing to be
-    * disabled in background handling.  There is no evidence (so far) that this
-    * was being used; however, png_set_background itself accepted and must still
-    * accept '0' for the gamma value it takes, because it isn't always used.
-    *
-    * Since this is an API change (albeit a very minor one that removes an
-    * undocumented API feature) the following checks were only enabled in
-    * libpng-1.6.0.
-    */
-   if (file_gamma <= 0)
-      png_error(png_ptr, "invalid file gamma in png_set_gamma");
-
-   if (scrn_gamma <= 0)
-      png_error(png_ptr, "invalid screen gamma in png_set_gamma");
-
-   /* Set the gamma values unconditionally - this overrides the value in the PNG
-    * file if a gAMA chunk was present.  png_set_alpha_mode provides a
-    * different, easier, way to default the file gamma.
-    */
-   png_ptr->colorspace.gamma = file_gamma;
-   png_ptr->colorspace.flags |= PNG_COLORSPACE_HAVE_GAMMA;
-   png_ptr->screen_gamma = scrn_gamma;
-}
-
-#  ifdef PNG_FLOATING_POINT_SUPPORTED
-void PNGAPI
-png_set_gamma(png_structrp png_ptr, double scrn_gamma, double file_gamma)
-{
-   png_set_gamma_fixed(png_ptr, convert_gamma_value(png_ptr, scrn_gamma),
-       convert_gamma_value(png_ptr, file_gamma));
-}
-#  endif /* FLOATING_POINT */
-#endif /* READ_GAMMA */
-
-#ifdef PNG_READ_EXPAND_SUPPORTED
-/* Expand paletted images to RGB, expand grayscale images of
- * less than 8-bit depth to 8-bit depth, and expand tRNS chunks
- * to alpha channels.
- */
-void PNGAPI
-png_set_expand(png_structrp png_ptr)
-{
-   png_debug(1, "in png_set_expand");
-
-   if (png_rtran_ok(png_ptr, 0) == 0)
-      return;
-
-   png_ptr->transformations |= (PNG_EXPAND | PNG_EXPAND_tRNS);
-}
-
-/* GRR 19990627:  the following three functions currently are identical
- *  to png_set_expand().  However, it is entirely reasonable that someone
- *  might wish to expand an indexed image to RGB but *not* expand a single,
- *  fully transparent palette entry to a full alpha channel--perhaps instead
- *  convert tRNS to the grayscale/RGB format (16-bit RGB value), or replace
- *  the transparent color with a particular RGB value, or drop tRNS entirely.
- *  IOW, a future version of the library may make the transformations flag
- *  a bit more fine-grained, with separate bits for each of these three
- *  functions.
- *
- *  More to the point, these functions make it obvious what libpng will be
- *  doing, whereas "expand" can (and does) mean any number of things.
- *
- *  GRP 20060307: In libpng-1.2.9, png_set_gray_1_2_4_to_8() was modified
- *  to expand only the sample depth but not to expand the tRNS to alpha
- *  and its name was changed to png_set_expand_gray_1_2_4_to_8().
- */
-
-/* Expand paletted images to RGB. */
-void PNGAPI
-png_set_palette_to_rgb(png_structrp png_ptr)
-{
-   png_debug(1, "in png_set_palette_to_rgb");
-
-   if (png_rtran_ok(png_ptr, 0) == 0)
-      return;
-
-   png_ptr->transformations |= (PNG_EXPAND | PNG_EXPAND_tRNS);
-}
-
-/* Expand grayscale images of less than 8-bit depth to 8 bits. */
-void PNGAPI
-png_set_expand_gray_1_2_4_to_8(png_structrp png_ptr)
-{
-   png_debug(1, "in png_set_expand_gray_1_2_4_to_8");
-
-   if (png_rtran_ok(png_ptr, 0) == 0)
-      return;
-
-   png_ptr->transformations |= PNG_EXPAND;
-}
-
-/* Expand tRNS chunks to alpha channels. */
-void PNGAPI
-png_set_tRNS_to_alpha(png_structrp png_ptr)
-{
-   png_debug(1, "in png_set_tRNS_to_alpha");
-
-   if (png_rtran_ok(png_ptr, 0) == 0)
-      return;
-
-   png_ptr->transformations |= (PNG_EXPAND | PNG_EXPAND_tRNS);
-}
-#endif /* READ_EXPAND */
-
-#ifdef PNG_READ_EXPAND_16_SUPPORTED
-/* Expand to 16-bit channels, expand the tRNS chunk too (because otherwise
- * it may not work correctly.)
- */
-void PNGAPI
-png_set_expand_16(png_structrp png_ptr)
-{
-   png_debug(1, "in png_set_expand_16");
-
-   if (png_rtran_ok(png_ptr, 0) == 0)
-      return;
-
-   png_ptr->transformations |= (PNG_EXPAND_16 | PNG_EXPAND | PNG_EXPAND_tRNS);
-}
-#endif
-
-#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
-void PNGAPI
-png_set_gray_to_rgb(png_structrp png_ptr)
-{
-   png_debug(1, "in png_set_gray_to_rgb");
-
-   if (png_rtran_ok(png_ptr, 0) == 0)
-      return;
-
-   /* Because rgb must be 8 bits or more: */
-   png_set_expand_gray_1_2_4_to_8(png_ptr);
-   png_ptr->transformations |= PNG_GRAY_TO_RGB;
-}
-#endif
-
-#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
-void PNGFAPI
-png_set_rgb_to_gray_fixed(png_structrp png_ptr, int error_action,
-    png_fixed_point red, png_fixed_point green)
-{
-   png_debug(1, "in png_set_rgb_to_gray_fixed");
-
-   /* Need the IHDR here because of the check on color_type below. */
-   /* TODO: fix this */
-   if (png_rtran_ok(png_ptr, 1) == 0)
-      return;
-
-   switch (error_action)
-   {
-      case PNG_ERROR_ACTION_NONE:
-         png_ptr->transformations |= PNG_RGB_TO_GRAY;
-         break;
-
-      case PNG_ERROR_ACTION_WARN:
-         png_ptr->transformations |= PNG_RGB_TO_GRAY_WARN;
-         break;
-
-      case PNG_ERROR_ACTION_ERROR:
-         png_ptr->transformations |= PNG_RGB_TO_GRAY_ERR;
-         break;
-
-      default:
-         png_error(png_ptr, "invalid error action to rgb_to_gray");
-   }
-
-   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-#ifdef PNG_READ_EXPAND_SUPPORTED
-      png_ptr->transformations |= PNG_EXPAND;
-#else
-   {
-      /* Make this an error in 1.6 because otherwise the application may assume
-       * that it just worked and get a memory overwrite.
-       */
-      png_error(png_ptr,
-          "Cannot do RGB_TO_GRAY without EXPAND_SUPPORTED");
-
-      /* png_ptr->transformations &= ~PNG_RGB_TO_GRAY; */
-   }
-#endif
-   {
-      if (red >= 0 && green >= 0 && red + green <= PNG_FP_1)
-      {
-         png_uint_16 red_int, green_int;
-
-         /* NOTE: this calculation does not round, but this behavior is retained
-          * for consistency; the inaccuracy is very small.  The code here always
-          * overwrites the coefficients, regardless of whether they have been
-          * defaulted or set already.
-          */
-         red_int = (png_uint_16)(((png_uint_32)red*32768)/100000);
-         green_int = (png_uint_16)(((png_uint_32)green*32768)/100000);
-
-         png_ptr->rgb_to_gray_red_coeff   = red_int;
-         png_ptr->rgb_to_gray_green_coeff = green_int;
-         png_ptr->rgb_to_gray_coefficients_set = 1;
-      }
-
-      else
-      {
-         if (red >= 0 && green >= 0)
-            png_app_warning(png_ptr,
-                "ignoring out of range rgb_to_gray coefficients");
-
-         /* Use the defaults, from the cHRM chunk if set, else the historical
-          * values which are close to the sRGB/HDTV/ITU-Rec 709 values.  See
-          * png_do_rgb_to_gray for more discussion of the values.  In this case
-          * the coefficients are not marked as 'set' and are not overwritten if
-          * something has already provided a default.
-          */
-         if (png_ptr->rgb_to_gray_red_coeff == 0 &&
-             png_ptr->rgb_to_gray_green_coeff == 0)
-         {
-            png_ptr->rgb_to_gray_red_coeff   = 6968;
-            png_ptr->rgb_to_gray_green_coeff = 23434;
-            /* png_ptr->rgb_to_gray_blue_coeff  = 2366; */
-         }
-      }
-   }
-}
-
-#ifdef PNG_FLOATING_POINT_SUPPORTED
-/* Convert a RGB image to a grayscale of the same width.  This allows us,
- * for example, to convert a 24 bpp RGB image into an 8 bpp grayscale image.
- */
-
-void PNGAPI
-png_set_rgb_to_gray(png_structrp png_ptr, int error_action, double red,
-    double green)
-{
-   png_set_rgb_to_gray_fixed(png_ptr, error_action,
-       png_fixed(png_ptr, red, "rgb to gray red coefficient"),
-      png_fixed(png_ptr, green, "rgb to gray green coefficient"));
-}
-#endif /* FLOATING POINT */
-
-#endif /* RGB_TO_GRAY */
-
-#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
-    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
-void PNGAPI
-png_set_read_user_transform_fn(png_structrp png_ptr, png_user_transform_ptr
-    read_user_transform_fn)
-{
-   png_debug(1, "in png_set_read_user_transform_fn");
-
-#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
-   png_ptr->transformations |= PNG_USER_TRANSFORM;
-   png_ptr->read_user_transform_fn = read_user_transform_fn;
-#endif
-}
-#endif
-
-#ifdef PNG_READ_TRANSFORMS_SUPPORTED
-#ifdef PNG_READ_GAMMA_SUPPORTED
-/* In the case of gamma transformations only do transformations on images where
- * the [file] gamma and screen_gamma are not close reciprocals, otherwise it
- * slows things down slightly, and also needlessly introduces small errors.
- */
-static int /* PRIVATE */
-png_gamma_threshold(png_fixed_point screen_gamma, png_fixed_point file_gamma)
-{
-   /* PNG_GAMMA_THRESHOLD is the threshold for performing gamma
-    * correction as a difference of the overall transform from 1.0
-    *
-    * We want to compare the threshold with s*f - 1, if we get
-    * overflow here it is because of wacky gamma values so we
-    * turn on processing anyway.
-    */
-   png_fixed_point gtest;
-   return !png_muldiv(&gtest, screen_gamma, file_gamma, PNG_FP_1) ||
-       png_gamma_significant(gtest);
-}
-#endif
-
-/* Initialize everything needed for the read.  This includes modifying
- * the palette.
- */
-
-/* For the moment 'png_init_palette_transformations' and
- * 'png_init_rgb_transformations' only do some flag canceling optimizations.
- * The intent is that these two routines should have palette or rgb operations
- * extracted from 'png_init_read_transformations'.
- */
-static void /* PRIVATE */
-png_init_palette_transformations(png_structrp png_ptr)
-{
-   /* Called to handle the (input) palette case.  In png_do_read_transformations
-    * the first step is to expand the palette if requested, so this code must
-    * take care to only make changes that are invariant with respect to the
-    * palette expansion, or only do them if there is no expansion.
-    *
-    * STRIP_ALPHA has already been handled in the caller (by setting num_trans
-    * to 0.)
-    */
-   int input_has_alpha = 0;
-   int input_has_transparency = 0;
-
-   if (png_ptr->num_trans > 0)
-   {
-      int i;
-
-      /* Ignore if all the entries are opaque (unlikely!) */
-      for (i=0; i<png_ptr->num_trans; ++i)
-      {
-         if (png_ptr->trans_alpha[i] == 255)
-            continue;
-         else if (png_ptr->trans_alpha[i] == 0)
-            input_has_transparency = 1;
-         else
-         {
-            input_has_transparency = 1;
-            input_has_alpha = 1;
-            break;
-         }
-      }
-   }
-
-   /* If no alpha we can optimize. */
-   if (input_has_alpha == 0)
-   {
-      /* Any alpha means background and associative alpha processing is
-       * required, however if the alpha is 0 or 1 throughout OPTIMIZE_ALPHA
-       * and ENCODE_ALPHA are irrelevant.
-       */
-      png_ptr->transformations &= ~PNG_ENCODE_ALPHA;
-      png_ptr->flags &= ~PNG_FLAG_OPTIMIZE_ALPHA;
-
-      if (input_has_transparency == 0)
-         png_ptr->transformations &= ~(PNG_COMPOSE | PNG_BACKGROUND_EXPAND);
-   }
-
-#if defined(PNG_READ_EXPAND_SUPPORTED) && defined(PNG_READ_BACKGROUND_SUPPORTED)
-   /* png_set_background handling - deals with the complexity of whether the
-    * background color is in the file format or the screen format in the case
-    * where an 'expand' will happen.
-    */
-
-   /* The following code cannot be entered in the alpha pre-multiplication case
-    * because PNG_BACKGROUND_EXPAND is cancelled below.
-    */
-   if ((png_ptr->transformations & PNG_BACKGROUND_EXPAND) != 0 &&
-       (png_ptr->transformations & PNG_EXPAND) != 0)
-   {
-      {
-         png_ptr->background.red   =
-             png_ptr->palette[png_ptr->background.index].red;
-         png_ptr->background.green =
-             png_ptr->palette[png_ptr->background.index].green;
-         png_ptr->background.blue  =
-             png_ptr->palette[png_ptr->background.index].blue;
-
-#ifdef PNG_READ_INVERT_ALPHA_SUPPORTED
-         if ((png_ptr->transformations & PNG_INVERT_ALPHA) != 0)
-         {
-            if ((png_ptr->transformations & PNG_EXPAND_tRNS) == 0)
-            {
-               /* Invert the alpha channel (in tRNS) unless the pixels are
-                * going to be expanded, in which case leave it for later
-                */
-               int i, istop = png_ptr->num_trans;
-
-               for (i = 0; i < istop; i++)
-                  png_ptr->trans_alpha[i] =
-                      (png_byte)(255 - png_ptr->trans_alpha[i]);
-            }
-         }
-#endif /* READ_INVERT_ALPHA */
-      }
-   } /* background expand and (therefore) no alpha association. */
-#endif /* READ_EXPAND && READ_BACKGROUND */
-}
-
-static void /* PRIVATE */
-png_init_rgb_transformations(png_structrp png_ptr)
-{
-   /* Added to libpng-1.5.4: check the color type to determine whether there
-    * is any alpha or transparency in the image and simply cancel the
-    * background and alpha mode stuff if there isn't.
-    */
-   int input_has_alpha = (png_ptr->color_type & PNG_COLOR_MASK_ALPHA) != 0;
-   int input_has_transparency = png_ptr->num_trans > 0;
-
-   /* If no alpha we can optimize. */
-   if (input_has_alpha == 0)
-   {
-      /* Any alpha means background and associative alpha processing is
-       * required, however if the alpha is 0 or 1 throughout OPTIMIZE_ALPHA
-       * and ENCODE_ALPHA are irrelevant.
-       */
-#     ifdef PNG_READ_ALPHA_MODE_SUPPORTED
-         png_ptr->transformations &= ~PNG_ENCODE_ALPHA;
-         png_ptr->flags &= ~PNG_FLAG_OPTIMIZE_ALPHA;
-#     endif
-
-      if (input_has_transparency == 0)
-         png_ptr->transformations &= ~(PNG_COMPOSE | PNG_BACKGROUND_EXPAND);
-   }
-
-#if defined(PNG_READ_EXPAND_SUPPORTED) && defined(PNG_READ_BACKGROUND_SUPPORTED)
-   /* png_set_background handling - deals with the complexity of whether the
-    * background color is in the file format or the screen format in the case
-    * where an 'expand' will happen.
-    */
-
-   /* The following code cannot be entered in the alpha pre-multiplication case
-    * because PNG_BACKGROUND_EXPAND is cancelled below.
-    */
-   if ((png_ptr->transformations & PNG_BACKGROUND_EXPAND) != 0 &&
-       (png_ptr->transformations & PNG_EXPAND) != 0 &&
-       (png_ptr->color_type & PNG_COLOR_MASK_COLOR) == 0)
-       /* i.e., GRAY or GRAY_ALPHA */
-   {
-      {
-         /* Expand background and tRNS chunks */
-         int gray = png_ptr->background.gray;
-         int trans_gray = png_ptr->trans_color.gray;
-
-         switch (png_ptr->bit_depth)
-         {
-            case 1:
-               gray *= 0xff;
-               trans_gray *= 0xff;
-               break;
-
-            case 2:
-               gray *= 0x55;
-               trans_gray *= 0x55;
-               break;
-
-            case 4:
-               gray *= 0x11;
-               trans_gray *= 0x11;
-               break;
-
-            default:
-
-            case 8:
-               /* FALLTHROUGH */ /*  (Already 8 bits) */
-
-            case 16:
-               /* Already a full 16 bits */
-               break;
-         }
-
-         png_ptr->background.red = png_ptr->background.green =
-            png_ptr->background.blue = (png_uint_16)gray;
-
-         if ((png_ptr->transformations & PNG_EXPAND_tRNS) == 0)
-         {
-            png_ptr->trans_color.red = png_ptr->trans_color.green =
-               png_ptr->trans_color.blue = (png_uint_16)trans_gray;
-         }
-      }
-   } /* background expand and (therefore) no alpha association. */
-#endif /* READ_EXPAND && READ_BACKGROUND */
-}
-
-void /* PRIVATE */
-png_init_read_transformations(png_structrp png_ptr)
-{
-   png_debug(1, "in png_init_read_transformations");
-
-   /* This internal function is called from png_read_start_row in pngrutil.c
-    * and it is called before the 'rowbytes' calculation is done, so the code
-    * in here can change or update the transformations flags.
-    *
-    * First do updates that do not depend on the details of the PNG image data
-    * being processed.
-    */
-
-#ifdef PNG_READ_GAMMA_SUPPORTED
-   /* Prior to 1.5.4 these tests were performed from png_set_gamma, 1.5.4 adds
-    * png_set_alpha_mode and this is another source for a default file gamma so
-    * the test needs to be performed later - here.  In addition prior to 1.5.4
-    * the tests were repeated for the PALETTE color type here - this is no
-    * longer necessary (and doesn't seem to have been necessary before.)
-    */
-   {
-      /* The following temporary indicates if overall gamma correction is
-       * required.
-       */
-      int gamma_correction = 0;
-
-      if (png_ptr->colorspace.gamma != 0) /* has been set */
-      {
-         if (png_ptr->screen_gamma != 0) /* screen set too */
-            gamma_correction = png_gamma_threshold(png_ptr->colorspace.gamma,
-                png_ptr->screen_gamma);
-
-         else
-            /* Assume the output matches the input; a long time default behavior
-             * of libpng, although the standard has nothing to say about this.
-             */
-            png_ptr->screen_gamma = png_reciprocal(png_ptr->colorspace.gamma);
-      }
-
-      else if (png_ptr->screen_gamma != 0)
-         /* The converse - assume the file matches the screen, note that this
-          * perhaps undesirable default can (from 1.5.4) be changed by calling
-          * png_set_alpha_mode (even if the alpha handling mode isn't required
-          * or isn't changed from the default.)
-          */
-         png_ptr->colorspace.gamma = png_reciprocal(png_ptr->screen_gamma);
-
-      else /* neither are set */
-         /* Just in case the following prevents any processing - file and screen
-          * are both assumed to be linear and there is no way to introduce a
-          * third gamma value other than png_set_background with 'UNIQUE', and,
-          * prior to 1.5.4
-          */
-         png_ptr->screen_gamma = png_ptr->colorspace.gamma = PNG_FP_1;
-
-      /* We have a gamma value now. */
-      png_ptr->colorspace.flags |= PNG_COLORSPACE_HAVE_GAMMA;
-
-      /* Now turn the gamma transformation on or off as appropriate.  Notice
-       * that PNG_GAMMA just refers to the file->screen correction.  Alpha
-       * composition may independently cause gamma correction because it needs
-       * linear data (e.g. if the file has a gAMA chunk but the screen gamma
-       * hasn't been specified.)  In any case this flag may get turned off in
-       * the code immediately below if the transform can be handled outside the
-       * row loop.
-       */
-      if (gamma_correction != 0)
-         png_ptr->transformations |= PNG_GAMMA;
-
-      else
-         png_ptr->transformations &= ~PNG_GAMMA;
-   }
-#endif
-
-   /* Certain transformations have the effect of preventing other
-    * transformations that happen afterward in png_do_read_transformations;
-    * resolve the interdependencies here.  From the code of
-    * png_do_read_transformations the order is:
-    *
-    *  1) PNG_EXPAND (including PNG_EXPAND_tRNS)
-    *  2) PNG_STRIP_ALPHA (if no compose)
-    *  3) PNG_RGB_TO_GRAY
-    *  4) PNG_GRAY_TO_RGB iff !PNG_BACKGROUND_IS_GRAY
-    *  5) PNG_COMPOSE
-    *  6) PNG_GAMMA
-    *  7) PNG_STRIP_ALPHA (if compose)
-    *  8) PNG_ENCODE_ALPHA
-    *  9) PNG_SCALE_16_TO_8
-    * 10) PNG_16_TO_8
-    * 11) PNG_QUANTIZE (converts to palette)
-    * 12) PNG_EXPAND_16
-    * 13) PNG_GRAY_TO_RGB iff PNG_BACKGROUND_IS_GRAY
-    * 14) PNG_INVERT_MONO
-    * 15) PNG_INVERT_ALPHA
-    * 16) PNG_SHIFT
-    * 17) PNG_PACK
-    * 18) PNG_BGR
-    * 19) PNG_PACKSWAP
-    * 20) PNG_FILLER (includes PNG_ADD_ALPHA)
-    * 21) PNG_SWAP_ALPHA
-    * 22) PNG_SWAP_BYTES
-    * 23) PNG_USER_TRANSFORM [must be last]
-    */
-#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED
-   if ((png_ptr->transformations & PNG_STRIP_ALPHA) != 0 &&
-       (png_ptr->transformations & PNG_COMPOSE) == 0)
-   {
-      /* Stripping the alpha channel happens immediately after the 'expand'
-       * transformations, before all other transformation, so it cancels out
-       * the alpha handling.  It has the side effect negating the effect of
-       * PNG_EXPAND_tRNS too:
-       */
-      png_ptr->transformations &= ~(PNG_BACKGROUND_EXPAND | PNG_ENCODE_ALPHA |
-         PNG_EXPAND_tRNS);
-      png_ptr->flags &= ~PNG_FLAG_OPTIMIZE_ALPHA;
-
-      /* Kill the tRNS chunk itself too.  Prior to 1.5.4 this did not happen
-       * so transparency information would remain just so long as it wasn't
-       * expanded.  This produces unexpected API changes if the set of things
-       * that do PNG_EXPAND_tRNS changes (perfectly possible given the
-       * documentation - which says ask for what you want, accept what you
-       * get.)  This makes the behavior consistent from 1.5.4:
-       */
-      png_ptr->num_trans = 0;
-   }
-#endif /* STRIP_ALPHA supported, no COMPOSE */
-
-#ifdef PNG_READ_ALPHA_MODE_SUPPORTED
-   /* If the screen gamma is about 1.0 then the OPTIMIZE_ALPHA and ENCODE_ALPHA
-    * settings will have no effect.
-    */
-   if (png_gamma_significant(png_ptr->screen_gamma) == 0)
-   {
-      png_ptr->transformations &= ~PNG_ENCODE_ALPHA;
-      png_ptr->flags &= ~PNG_FLAG_OPTIMIZE_ALPHA;
-   }
-#endif
-
-#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
-   /* Make sure the coefficients for the rgb to gray conversion are set
-    * appropriately.
-    */
-   if ((png_ptr->transformations & PNG_RGB_TO_GRAY) != 0)
-      png_colorspace_set_rgb_coefficients(png_ptr);
-#endif
-
-#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
-#if defined(PNG_READ_EXPAND_SUPPORTED) && defined(PNG_READ_BACKGROUND_SUPPORTED)
-   /* Detect gray background and attempt to enable optimization for
-    * gray --> RGB case.
-    *
-    * Note:  if PNG_BACKGROUND_EXPAND is set and color_type is either RGB or
-    * RGB_ALPHA (in which case need_expand is superfluous anyway), the
-    * background color might actually be gray yet not be flagged as such.
-    * This is not a problem for the current code, which uses
-    * PNG_BACKGROUND_IS_GRAY only to decide when to do the
-    * png_do_gray_to_rgb() transformation.
-    *
-    * TODO: this code needs to be revised to avoid the complexity and
-    * interdependencies.  The color type of the background should be recorded in
-    * png_set_background, along with the bit depth, then the code has a record
-    * of exactly what color space the background is currently in.
-    */
-   if ((png_ptr->transformations & PNG_BACKGROUND_EXPAND) != 0)
-   {
-      /* PNG_BACKGROUND_EXPAND: the background is in the file color space, so if
-       * the file was grayscale the background value is gray.
-       */
-      if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) == 0)
-         png_ptr->mode |= PNG_BACKGROUND_IS_GRAY;
-   }
-
-   else if ((png_ptr->transformations & PNG_COMPOSE) != 0)
-   {
-      /* PNG_COMPOSE: png_set_background was called with need_expand false,
-       * so the color is in the color space of the output or png_set_alpha_mode
-       * was called and the color is black.  Ignore RGB_TO_GRAY because that
-       * happens before GRAY_TO_RGB.
-       */
-      if ((png_ptr->transformations & PNG_GRAY_TO_RGB) != 0)
-      {
-         if (png_ptr->background.red == png_ptr->background.green &&
-             png_ptr->background.red == png_ptr->background.blue)
-         {
-            png_ptr->mode |= PNG_BACKGROUND_IS_GRAY;
-            png_ptr->background.gray = png_ptr->background.red;
-         }
-      }
-   }
-#endif /* READ_EXPAND && READ_BACKGROUND */
-#endif /* READ_GRAY_TO_RGB */
-
-   /* For indexed PNG data (PNG_COLOR_TYPE_PALETTE) many of the transformations
-    * can be performed directly on the palette, and some (such as rgb to gray)
-    * can be optimized inside the palette.  This is particularly true of the
-    * composite (background and alpha) stuff, which can be pretty much all done
-    * in the palette even if the result is expanded to RGB or gray afterward.
-    *
-    * NOTE: this is Not Yet Implemented, the code behaves as in 1.5.1 and
-    * earlier and the palette stuff is actually handled on the first row.  This
-    * leads to the reported bug that the palette returned by png_get_PLTE is not
-    * updated.
-    */
-   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-      png_init_palette_transformations(png_ptr);
-
-   else
-      png_init_rgb_transformations(png_ptr);
-
-#if defined(PNG_READ_BACKGROUND_SUPPORTED) && \
-   defined(PNG_READ_EXPAND_16_SUPPORTED)
-   if ((png_ptr->transformations & PNG_EXPAND_16) != 0 &&
-       (png_ptr->transformations & PNG_COMPOSE) != 0 &&
-       (png_ptr->transformations & PNG_BACKGROUND_EXPAND) == 0 &&
-       png_ptr->bit_depth != 16)
-   {
-      /* TODO: fix this.  Because the expand_16 operation is after the compose
-       * handling the background color must be 8, not 16, bits deep, but the
-       * application will supply a 16-bit value so reduce it here.
-       *
-       * The PNG_BACKGROUND_EXPAND code above does not expand to 16 bits at
-       * present, so that case is ok (until do_expand_16 is moved.)
-       *
-       * NOTE: this discards the low 16 bits of the user supplied background
-       * color, but until expand_16 works properly there is no choice!
-       */
-#     define CHOP(x) (x)=((png_uint_16)PNG_DIV257(x))
-      CHOP(png_ptr->background.red);
-      CHOP(png_ptr->background.green);
-      CHOP(png_ptr->background.blue);
-      CHOP(png_ptr->background.gray);
-#     undef CHOP
-   }
-#endif /* READ_BACKGROUND && READ_EXPAND_16 */
-
-#if defined(PNG_READ_BACKGROUND_SUPPORTED) && \
-   (defined(PNG_READ_SCALE_16_TO_8_SUPPORTED) || \
-   defined(PNG_READ_STRIP_16_TO_8_SUPPORTED))
-   if ((png_ptr->transformations & (PNG_16_TO_8|PNG_SCALE_16_TO_8)) != 0 &&
-       (png_ptr->transformations & PNG_COMPOSE) != 0 &&
-       (png_ptr->transformations & PNG_BACKGROUND_EXPAND) == 0 &&
-       png_ptr->bit_depth == 16)
-   {
-      /* On the other hand, if a 16-bit file is to be reduced to 8-bits per
-       * component this will also happen after PNG_COMPOSE and so the background
-       * color must be pre-expanded here.
-       *
-       * TODO: fix this too.
-       */
-      png_ptr->background.red = (png_uint_16)(png_ptr->background.red * 257);
-      png_ptr->background.green =
-         (png_uint_16)(png_ptr->background.green * 257);
-      png_ptr->background.blue = (png_uint_16)(png_ptr->background.blue * 257);
-      png_ptr->background.gray = (png_uint_16)(png_ptr->background.gray * 257);
-   }
-#endif
-
-   /* NOTE: below 'PNG_READ_ALPHA_MODE_SUPPORTED' is presumed to also enable the
-    * background support (see the comments in scripts/pnglibconf.dfa), this
-    * allows pre-multiplication of the alpha channel to be implemented as
-    * compositing on black.  This is probably sub-optimal and has been done in
-    * 1.5.4 betas simply to enable external critique and testing (i.e. to
-    * implement the new API quickly, without lots of internal changes.)
-    */
-
-#ifdef PNG_READ_GAMMA_SUPPORTED
-#  ifdef PNG_READ_BACKGROUND_SUPPORTED
-      /* Includes ALPHA_MODE */
-      png_ptr->background_1 = png_ptr->background;
-#  endif
-
-   /* This needs to change - in the palette image case a whole set of tables are
-    * built when it would be quicker to just calculate the correct value for
-    * each palette entry directly.  Also, the test is too tricky - why check
-    * PNG_RGB_TO_GRAY if PNG_GAMMA is not set?  The answer seems to be that
-    * PNG_GAMMA is cancelled even if the gamma is known?  The test excludes the
-    * PNG_COMPOSE case, so apparently if there is no *overall* gamma correction
-    * the gamma tables will not be built even if composition is required on a
-    * gamma encoded value.
-    *
-    * In 1.5.4 this is addressed below by an additional check on the individual
-    * file gamma - if it is not 1.0 both RGB_TO_GRAY and COMPOSE need the
-    * tables.
-    */
-   if ((png_ptr->transformations & PNG_GAMMA) != 0 ||
-       ((png_ptr->transformations & PNG_RGB_TO_GRAY) != 0 &&
-        (png_gamma_significant(png_ptr->colorspace.gamma) != 0 ||
-         png_gamma_significant(png_ptr->screen_gamma) != 0)) ||
-        ((png_ptr->transformations & PNG_COMPOSE) != 0 &&
-         (png_gamma_significant(png_ptr->colorspace.gamma) != 0 ||
-          png_gamma_significant(png_ptr->screen_gamma) != 0
-#  ifdef PNG_READ_BACKGROUND_SUPPORTED
-         || (png_ptr->background_gamma_type == PNG_BACKGROUND_GAMMA_UNIQUE &&
-           png_gamma_significant(png_ptr->background_gamma) != 0)
-#  endif
-        )) || ((png_ptr->transformations & PNG_ENCODE_ALPHA) != 0 &&
-       png_gamma_significant(png_ptr->screen_gamma) != 0))
-   {
-      png_build_gamma_table(png_ptr, png_ptr->bit_depth);
-
-#ifdef PNG_READ_BACKGROUND_SUPPORTED
-      if ((png_ptr->transformations & PNG_COMPOSE) != 0)
-      {
-         /* Issue a warning about this combination: because RGB_TO_GRAY is
-          * optimized to do the gamma transform if present yet do_background has
-          * to do the same thing if both options are set a
-          * double-gamma-correction happens.  This is true in all versions of
-          * libpng to date.
-          */
-         if ((png_ptr->transformations & PNG_RGB_TO_GRAY) != 0)
-            png_warning(png_ptr,
-                "libpng does not support gamma+background+rgb_to_gray");
-
-         if ((png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) != 0)
-         {
-            /* We don't get to here unless there is a tRNS chunk with non-opaque
-             * entries - see the checking code at the start of this function.
-             */
-            png_color back, back_1;
-            png_colorp palette = png_ptr->palette;
-            int num_palette = png_ptr->num_palette;
-            int i;
-            if (png_ptr->background_gamma_type == PNG_BACKGROUND_GAMMA_FILE)
-            {
-
-               back.red = png_ptr->gamma_table[png_ptr->background.red];
-               back.green = png_ptr->gamma_table[png_ptr->background.green];
-               back.blue = png_ptr->gamma_table[png_ptr->background.blue];
-
-               back_1.red = png_ptr->gamma_to_1[png_ptr->background.red];
-               back_1.green = png_ptr->gamma_to_1[png_ptr->background.green];
-               back_1.blue = png_ptr->gamma_to_1[png_ptr->background.blue];
-            }
-            else
-            {
-               png_fixed_point g, gs;
-
-               switch (png_ptr->background_gamma_type)
-               {
-                  case PNG_BACKGROUND_GAMMA_SCREEN:
-                     g = (png_ptr->screen_gamma);
-                     gs = PNG_FP_1;
-                     break;
-
-                  case PNG_BACKGROUND_GAMMA_FILE:
-                     g = png_reciprocal(png_ptr->colorspace.gamma);
-                     gs = png_reciprocal2(png_ptr->colorspace.gamma,
-                         png_ptr->screen_gamma);
-                     break;
-
-                  case PNG_BACKGROUND_GAMMA_UNIQUE:
-                     g = png_reciprocal(png_ptr->background_gamma);
-                     gs = png_reciprocal2(png_ptr->background_gamma,
-                         png_ptr->screen_gamma);
-                     break;
-                  default:
-                     g = PNG_FP_1;    /* back_1 */
-                     gs = PNG_FP_1;   /* back */
-                     break;
-               }
-
-               if (png_gamma_significant(gs) != 0)
-               {
-                  back.red = png_gamma_8bit_correct(png_ptr->background.red,
-                      gs);
-                  back.green = png_gamma_8bit_correct(png_ptr->background.green,
-                      gs);
-                  back.blue = png_gamma_8bit_correct(png_ptr->background.blue,
-                      gs);
-               }
-
-               else
-               {
-                  back.red   = (png_byte)png_ptr->background.red;
-                  back.green = (png_byte)png_ptr->background.green;
-                  back.blue  = (png_byte)png_ptr->background.blue;
-               }
-
-               if (png_gamma_significant(g) != 0)
-               {
-                  back_1.red = png_gamma_8bit_correct(png_ptr->background.red,
-                      g);
-                  back_1.green = png_gamma_8bit_correct(
-                      png_ptr->background.green, g);
-                  back_1.blue = png_gamma_8bit_correct(png_ptr->background.blue,
-                      g);
-               }
-
-               else
-               {
-                  back_1.red   = (png_byte)png_ptr->background.red;
-                  back_1.green = (png_byte)png_ptr->background.green;
-                  back_1.blue  = (png_byte)png_ptr->background.blue;
-               }
-            }
-
-            for (i = 0; i < num_palette; i++)
-            {
-               if (i < (int)png_ptr->num_trans &&
-                   png_ptr->trans_alpha[i] != 0xff)
-               {
-                  if (png_ptr->trans_alpha[i] == 0)
-                  {
-                     palette[i] = back;
-                  }
-                  else /* if (png_ptr->trans_alpha[i] != 0xff) */
-                  {
-                     png_byte v, w;
-
-                     v = png_ptr->gamma_to_1[palette[i].red];
-                     png_composite(w, v, png_ptr->trans_alpha[i], back_1.red);
-                     palette[i].red = png_ptr->gamma_from_1[w];
-
-                     v = png_ptr->gamma_to_1[palette[i].green];
-                     png_composite(w, v, png_ptr->trans_alpha[i], back_1.green);
-                     palette[i].green = png_ptr->gamma_from_1[w];
-
-                     v = png_ptr->gamma_to_1[palette[i].blue];
-                     png_composite(w, v, png_ptr->trans_alpha[i], back_1.blue);
-                     palette[i].blue = png_ptr->gamma_from_1[w];
-                  }
-               }
-               else
-               {
-                  palette[i].red = png_ptr->gamma_table[palette[i].red];
-                  palette[i].green = png_ptr->gamma_table[palette[i].green];
-                  palette[i].blue = png_ptr->gamma_table[palette[i].blue];
-               }
-            }
-
-            /* Prevent the transformations being done again.
-             *
-             * NOTE: this is highly dubious; it removes the transformations in
-             * place.  This seems inconsistent with the general treatment of the
-             * transformations elsewhere.
-             */
-            png_ptr->transformations &= ~(PNG_COMPOSE | PNG_GAMMA);
-         } /* color_type == PNG_COLOR_TYPE_PALETTE */
-
-         /* if (png_ptr->background_gamma_type!=PNG_BACKGROUND_GAMMA_UNKNOWN) */
-         else /* color_type != PNG_COLOR_TYPE_PALETTE */
-         {
-            int gs_sig, g_sig;
-            png_fixed_point g = PNG_FP_1;  /* Correction to linear */
-            png_fixed_point gs = PNG_FP_1; /* Correction to screen */
-
-            switch (png_ptr->background_gamma_type)
-            {
-               case PNG_BACKGROUND_GAMMA_SCREEN:
-                  g = png_ptr->screen_gamma;
-                  /* gs = PNG_FP_1; */
-                  break;
-
-               case PNG_BACKGROUND_GAMMA_FILE:
-                  g = png_reciprocal(png_ptr->colorspace.gamma);
-                  gs = png_reciprocal2(png_ptr->colorspace.gamma,
-                      png_ptr->screen_gamma);
-                  break;
-
-               case PNG_BACKGROUND_GAMMA_UNIQUE:
-                  g = png_reciprocal(png_ptr->background_gamma);
-                  gs = png_reciprocal2(png_ptr->background_gamma,
-                      png_ptr->screen_gamma);
-                  break;
-
-               default:
-                  png_error(png_ptr, "invalid background gamma type");
-            }
-
-            g_sig = png_gamma_significant(g);
-            gs_sig = png_gamma_significant(gs);
-
-            if (g_sig != 0)
-               png_ptr->background_1.gray = png_gamma_correct(png_ptr,
-                   png_ptr->background.gray, g);
-
-            if (gs_sig != 0)
-               png_ptr->background.gray = png_gamma_correct(png_ptr,
-                   png_ptr->background.gray, gs);
-
-            if ((png_ptr->background.red != png_ptr->background.green) ||
-                (png_ptr->background.red != png_ptr->background.blue) ||
-                (png_ptr->background.red != png_ptr->background.gray))
-            {
-               /* RGB or RGBA with color background */
-               if (g_sig != 0)
-               {
-                  png_ptr->background_1.red = png_gamma_correct(png_ptr,
-                      png_ptr->background.red, g);
-
-                  png_ptr->background_1.green = png_gamma_correct(png_ptr,
-                      png_ptr->background.green, g);
-
-                  png_ptr->background_1.blue = png_gamma_correct(png_ptr,
-                      png_ptr->background.blue, g);
-               }
-
-               if (gs_sig != 0)
-               {
-                  png_ptr->background.red = png_gamma_correct(png_ptr,
-                      png_ptr->background.red, gs);
-
-                  png_ptr->background.green = png_gamma_correct(png_ptr,
-                      png_ptr->background.green, gs);
-
-                  png_ptr->background.blue = png_gamma_correct(png_ptr,
-                      png_ptr->background.blue, gs);
-               }
-            }
-
-            else
-            {
-               /* GRAY, GRAY ALPHA, RGB, or RGBA with gray background */
-               png_ptr->background_1.red = png_ptr->background_1.green
-                   = png_ptr->background_1.blue = png_ptr->background_1.gray;
-
-               png_ptr->background.red = png_ptr->background.green
-                   = png_ptr->background.blue = png_ptr->background.gray;
-            }
-
-            /* The background is now in screen gamma: */
-            png_ptr->background_gamma_type = PNG_BACKGROUND_GAMMA_SCREEN;
-         } /* color_type != PNG_COLOR_TYPE_PALETTE */
-      }/* png_ptr->transformations & PNG_BACKGROUND */
-
-      else
-      /* Transformation does not include PNG_BACKGROUND */
-#endif /* READ_BACKGROUND */
-      if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE
-#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
-         /* RGB_TO_GRAY needs to have non-gamma-corrected values! */
-         && ((png_ptr->transformations & PNG_EXPAND) == 0 ||
-         (png_ptr->transformations & PNG_RGB_TO_GRAY) == 0)
-#endif
-         )
-      {
-         png_colorp palette = png_ptr->palette;
-         int num_palette = png_ptr->num_palette;
-         int i;
-
-         /* NOTE: there are other transformations that should probably be in
-          * here too.
-          */
-         for (i = 0; i < num_palette; i++)
-         {
-            palette[i].red = png_ptr->gamma_table[palette[i].red];
-            palette[i].green = png_ptr->gamma_table[palette[i].green];
-            palette[i].blue = png_ptr->gamma_table[palette[i].blue];
-         }
-
-         /* Done the gamma correction. */
-         png_ptr->transformations &= ~PNG_GAMMA;
-      } /* color_type == PALETTE && !PNG_BACKGROUND transformation */
-   }
-#ifdef PNG_READ_BACKGROUND_SUPPORTED
-   else
-#endif
-#endif /* READ_GAMMA */
-
-#ifdef PNG_READ_BACKGROUND_SUPPORTED
-   /* No GAMMA transformation (see the hanging else 4 lines above) */
-   if ((png_ptr->transformations & PNG_COMPOSE) != 0 &&
-       (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE))
-   {
-      int i;
-      int istop = (int)png_ptr->num_trans;
-      png_color back;
-      png_colorp palette = png_ptr->palette;
-
-      back.red   = (png_byte)png_ptr->background.red;
-      back.green = (png_byte)png_ptr->background.green;
-      back.blue  = (png_byte)png_ptr->background.blue;
-
-      for (i = 0; i < istop; i++)
-      {
-         if (png_ptr->trans_alpha[i] == 0)
-         {
-            palette[i] = back;
-         }
-
-         else if (png_ptr->trans_alpha[i] != 0xff)
-         {
-            /* The png_composite() macro is defined in png.h */
-            png_composite(palette[i].red, palette[i].red,
-                png_ptr->trans_alpha[i], back.red);
-
-            png_composite(palette[i].green, palette[i].green,
-                png_ptr->trans_alpha[i], back.green);
-
-            png_composite(palette[i].blue, palette[i].blue,
-                png_ptr->trans_alpha[i], back.blue);
-         }
-      }
-
-      png_ptr->transformations &= ~PNG_COMPOSE;
-   }
-#endif /* READ_BACKGROUND */
-
-#ifdef PNG_READ_SHIFT_SUPPORTED
-   if ((png_ptr->transformations & PNG_SHIFT) != 0 &&
-       (png_ptr->transformations & PNG_EXPAND) == 0 &&
-       (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE))
-   {
-      int i;
-      int istop = png_ptr->num_palette;
-      int shift = 8 - png_ptr->sig_bit.red;
-
-      png_ptr->transformations &= ~PNG_SHIFT;
-
-      /* significant bits can be in the range 1 to 7 for a meaningful result, if
-       * the number of significant bits is 0 then no shift is done (this is an
-       * error condition which is silently ignored.)
-       */
-      if (shift > 0 && shift < 8)
-         for (i=0; i<istop; ++i)
-         {
-            int component = png_ptr->palette[i].red;
-
-            component >>= shift;
-            png_ptr->palette[i].red = (png_byte)component;
-         }
-
-      shift = 8 - png_ptr->sig_bit.green;
-      if (shift > 0 && shift < 8)
-         for (i=0; i<istop; ++i)
-         {
-            int component = png_ptr->palette[i].green;
-
-            component >>= shift;
-            png_ptr->palette[i].green = (png_byte)component;
-         }
-
-      shift = 8 - png_ptr->sig_bit.blue;
-      if (shift > 0 && shift < 8)
-         for (i=0; i<istop; ++i)
-         {
-            int component = png_ptr->palette[i].blue;
-
-            component >>= shift;
-            png_ptr->palette[i].blue = (png_byte)component;
-         }
-   }
-#endif /* READ_SHIFT */
-}
-
-/* Modify the info structure to reflect the transformations.  The
- * info should be updated so a PNG file could be written with it,
- * assuming the transformations result in valid PNG data.
- */
-void /* PRIVATE */
-png_read_transform_info(png_structrp png_ptr, png_inforp info_ptr)
-{
-   png_debug(1, "in png_read_transform_info");
-
-#ifdef PNG_READ_EXPAND_SUPPORTED
-   if ((png_ptr->transformations & PNG_EXPAND) != 0)
-   {
-      if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-      {
-         /* This check must match what actually happens in
-          * png_do_expand_palette; if it ever checks the tRNS chunk to see if
-          * it is all opaque we must do the same (at present it does not.)
-          */
-         if (png_ptr->num_trans > 0)
-            info_ptr->color_type = PNG_COLOR_TYPE_RGB_ALPHA;
-
-         else
-            info_ptr->color_type = PNG_COLOR_TYPE_RGB;
-
-         info_ptr->bit_depth = 8;
-         info_ptr->num_trans = 0;
-
-         if (png_ptr->palette == NULL)
-            png_error (png_ptr, "Palette is NULL in indexed image");
-      }
-      else
-      {
-         if (png_ptr->num_trans != 0)
-         {
-            if ((png_ptr->transformations & PNG_EXPAND_tRNS) != 0)
-               info_ptr->color_type |= PNG_COLOR_MASK_ALPHA;
-         }
-         if (info_ptr->bit_depth < 8)
-            info_ptr->bit_depth = 8;
-
-         info_ptr->num_trans = 0;
-      }
-   }
-#endif
-
-#if defined(PNG_READ_BACKGROUND_SUPPORTED) ||\
-   defined(PNG_READ_ALPHA_MODE_SUPPORTED)
-   /* The following is almost certainly wrong unless the background value is in
-    * the screen space!
-    */
-   if ((png_ptr->transformations & PNG_COMPOSE) != 0)
-      info_ptr->background = png_ptr->background;
-#endif
-
-#ifdef PNG_READ_GAMMA_SUPPORTED
-   /* The following used to be conditional on PNG_GAMMA (prior to 1.5.4),
-    * however it seems that the code in png_init_read_transformations, which has
-    * been called before this from png_read_update_info->png_read_start_row
-    * sometimes does the gamma transform and cancels the flag.
-    *
-    * TODO: this looks wrong; the info_ptr should end up with a gamma equal to
-    * the screen_gamma value.  The following probably results in weirdness if
-    * the info_ptr is used by the app after the rows have been read.
-    */
-   info_ptr->colorspace.gamma = png_ptr->colorspace.gamma;
-#endif
-
-   if (info_ptr->bit_depth == 16)
-   {
-#  ifdef PNG_READ_16BIT_SUPPORTED
-#     ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED
-         if ((png_ptr->transformations & PNG_SCALE_16_TO_8) != 0)
-            info_ptr->bit_depth = 8;
-#     endif
-
-#     ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED
-         if ((png_ptr->transformations & PNG_16_TO_8) != 0)
-            info_ptr->bit_depth = 8;
-#     endif
-
-#  else
-      /* No 16-bit support: force chopping 16-bit input down to 8, in this case
-       * the app program can chose if both APIs are available by setting the
-       * correct scaling to use.
-       */
-#     ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED
-         /* For compatibility with previous versions use the strip method by
-          * default.  This code works because if PNG_SCALE_16_TO_8 is already
-          * set the code below will do that in preference to the chop.
-          */
-         png_ptr->transformations |= PNG_16_TO_8;
-         info_ptr->bit_depth = 8;
-#     else
-
-#        ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED
-            png_ptr->transformations |= PNG_SCALE_16_TO_8;
-            info_ptr->bit_depth = 8;
-#        else
-
-            CONFIGURATION ERROR: you must enable at least one 16 to 8 method
-#        endif
-#    endif
-#endif /* !READ_16BIT */
-   }
-
-#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
-   if ((png_ptr->transformations & PNG_GRAY_TO_RGB) != 0)
-      info_ptr->color_type = (png_byte)(info_ptr->color_type |
-         PNG_COLOR_MASK_COLOR);
-#endif
-
-#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
-   if ((png_ptr->transformations & PNG_RGB_TO_GRAY) != 0)
-      info_ptr->color_type = (png_byte)(info_ptr->color_type &
-         ~PNG_COLOR_MASK_COLOR);
-#endif
-
-#ifdef PNG_READ_QUANTIZE_SUPPORTED
-   if ((png_ptr->transformations & PNG_QUANTIZE) != 0)
-   {
-      if (((info_ptr->color_type == PNG_COLOR_TYPE_RGB) ||
-          (info_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA)) &&
-          png_ptr->palette_lookup != 0 && info_ptr->bit_depth == 8)
-      {
-         info_ptr->color_type = PNG_COLOR_TYPE_PALETTE;
-      }
-   }
-#endif
-
-#ifdef PNG_READ_EXPAND_16_SUPPORTED
-   if ((png_ptr->transformations & PNG_EXPAND_16) != 0 &&
-       info_ptr->bit_depth == 8 &&
-       info_ptr->color_type != PNG_COLOR_TYPE_PALETTE)
-   {
-      info_ptr->bit_depth = 16;
-   }
-#endif
-
-#ifdef PNG_READ_PACK_SUPPORTED
-   if ((png_ptr->transformations & PNG_PACK) != 0 &&
-       (info_ptr->bit_depth < 8))
-      info_ptr->bit_depth = 8;
-#endif
-
-   if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-      info_ptr->channels = 1;
-
-   else if ((info_ptr->color_type & PNG_COLOR_MASK_COLOR) != 0)
-      info_ptr->channels = 3;
-
-   else
-      info_ptr->channels = 1;
-
-#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED
-   if ((png_ptr->transformations & PNG_STRIP_ALPHA) != 0)
-   {
-      info_ptr->color_type = (png_byte)(info_ptr->color_type &
-         ~PNG_COLOR_MASK_ALPHA);
-      info_ptr->num_trans = 0;
-   }
-#endif
-
-   if ((info_ptr->color_type & PNG_COLOR_MASK_ALPHA) != 0)
-      info_ptr->channels++;
-
-#ifdef PNG_READ_FILLER_SUPPORTED
-   /* STRIP_ALPHA and FILLER allowed:  MASK_ALPHA bit stripped above */
-   if ((png_ptr->transformations & PNG_FILLER) != 0 &&
-       (info_ptr->color_type == PNG_COLOR_TYPE_RGB ||
-       info_ptr->color_type == PNG_COLOR_TYPE_GRAY))
-   {
-      info_ptr->channels++;
-      /* If adding a true alpha channel not just filler */
-      if ((png_ptr->transformations & PNG_ADD_ALPHA) != 0)
-         info_ptr->color_type |= PNG_COLOR_MASK_ALPHA;
-   }
-#endif
-
-#if defined(PNG_USER_TRANSFORM_PTR_SUPPORTED) && \
-defined(PNG_READ_USER_TRANSFORM_SUPPORTED)
-   if ((png_ptr->transformations & PNG_USER_TRANSFORM) != 0)
-   {
-      if (png_ptr->user_transform_depth != 0)
-         info_ptr->bit_depth = png_ptr->user_transform_depth;
-
-      if (png_ptr->user_transform_channels != 0)
-         info_ptr->channels = png_ptr->user_transform_channels;
-   }
-#endif
-
-   info_ptr->pixel_depth = (png_byte)(info_ptr->channels *
-       info_ptr->bit_depth);
-
-   info_ptr->rowbytes = PNG_ROWBYTES(info_ptr->pixel_depth, info_ptr->width);
-
-   /* Adding in 1.5.4: cache the above value in png_struct so that we can later
-    * check in png_rowbytes that the user buffer won't get overwritten.  Note
-    * that the field is not always set - if png_read_update_info isn't called
-    * the application has to either not do any transforms or get the calculation
-    * right itself.
-    */
-   png_ptr->info_rowbytes = info_ptr->rowbytes;
-
-#ifndef PNG_READ_EXPAND_SUPPORTED
-   if (png_ptr != NULL)
-      return;
-#endif
-}
-
-#ifdef PNG_READ_PACK_SUPPORTED
-/* Unpack pixels of 1, 2, or 4 bits per pixel into 1 byte per pixel,
- * without changing the actual values.  Thus, if you had a row with
- * a bit depth of 1, you would end up with bytes that only contained
- * the numbers 0 or 1.  If you would rather they contain 0 and 255, use
- * png_do_shift() after this.
- */
-static void
-png_do_unpack(png_row_infop row_info, png_bytep row)
-{
-   png_debug(1, "in png_do_unpack");
-
-   if (row_info->bit_depth < 8)
-   {
-      png_uint_32 i;
-      png_uint_32 row_width=row_info->width;
-
-      switch (row_info->bit_depth)
-      {
-         case 1:
-         {
-            png_bytep sp = row + (size_t)((row_width - 1) >> 3);
-            png_bytep dp = row + (size_t)row_width - 1;
-            png_uint_32 shift = 7U - ((row_width + 7U) & 0x07);
-            for (i = 0; i < row_width; i++)
-            {
-               *dp = (png_byte)((*sp >> shift) & 0x01);
-
-               if (shift == 7)
-               {
-                  shift = 0;
-                  sp--;
-               }
-
-               else
-                  shift++;
-
-               dp--;
-            }
-            break;
-         }
-
-         case 2:
-         {
-
-            png_bytep sp = row + (size_t)((row_width - 1) >> 2);
-            png_bytep dp = row + (size_t)row_width - 1;
-            png_uint_32 shift = ((3U - ((row_width + 3U) & 0x03)) << 1);
-            for (i = 0; i < row_width; i++)
-            {
-               *dp = (png_byte)((*sp >> shift) & 0x03);
-
-               if (shift == 6)
-               {
-                  shift = 0;
-                  sp--;
-               }
-
-               else
-                  shift += 2;
-
-               dp--;
-            }
-            break;
-         }
-
-         case 4:
-         {
-            png_bytep sp = row + (size_t)((row_width - 1) >> 1);
-            png_bytep dp = row + (size_t)row_width - 1;
-            png_uint_32 shift = ((1U - ((row_width + 1U) & 0x01)) << 2);
-            for (i = 0; i < row_width; i++)
-            {
-               *dp = (png_byte)((*sp >> shift) & 0x0f);
-
-               if (shift == 4)
-               {
-                  shift = 0;
-                  sp--;
-               }
-
-               else
-                  shift = 4;
-
-               dp--;
-            }
-            break;
-         }
-
-         default:
-            break;
-      }
-      row_info->bit_depth = 8;
-      row_info->pixel_depth = (png_byte)(8 * row_info->channels);
-      row_info->rowbytes = row_width * row_info->channels;
-   }
-}
-#endif
-
-#ifdef PNG_READ_SHIFT_SUPPORTED
-/* Reverse the effects of png_do_shift.  This routine merely shifts the
- * pixels back to their significant bits values.  Thus, if you have
- * a row of bit depth 8, but only 5 are significant, this will shift
- * the values back to 0 through 31.
- */
-static void
-png_do_unshift(png_row_infop row_info, png_bytep row,
-    png_const_color_8p sig_bits)
-{
-   int color_type;
-
-   png_debug(1, "in png_do_unshift");
-
-   /* The palette case has already been handled in the _init routine. */
-   color_type = row_info->color_type;
-
-   if (color_type != PNG_COLOR_TYPE_PALETTE)
-   {
-      int shift[4];
-      int channels = 0;
-      int bit_depth = row_info->bit_depth;
-
-      if ((color_type & PNG_COLOR_MASK_COLOR) != 0)
-      {
-         shift[channels++] = bit_depth - sig_bits->red;
-         shift[channels++] = bit_depth - sig_bits->green;
-         shift[channels++] = bit_depth - sig_bits->blue;
-      }
-
-      else
-      {
-         shift[channels++] = bit_depth - sig_bits->gray;
-      }
-
-      if ((color_type & PNG_COLOR_MASK_ALPHA) != 0)
-      {
-         shift[channels++] = bit_depth - sig_bits->alpha;
-      }
-
-      {
-         int c, have_shift;
-
-         for (c = have_shift = 0; c < channels; ++c)
-         {
-            /* A shift of more than the bit depth is an error condition but it
-             * gets ignored here.
-             */
-            if (shift[c] <= 0 || shift[c] >= bit_depth)
-               shift[c] = 0;
-
-            else
-               have_shift = 1;
-         }
-
-         if (have_shift == 0)
-            return;
-      }
-
-      switch (bit_depth)
-      {
-         default:
-         /* Must be 1bpp gray: should not be here! */
-            /* NOTREACHED */
-            break;
-
-         case 2:
-         /* Must be 2bpp gray */
-         /* assert(channels == 1 && shift[0] == 1) */
-         {
-            png_bytep bp = row;
-            png_bytep bp_end = bp + row_info->rowbytes;
-
-            while (bp < bp_end)
-            {
-               int b = (*bp >> 1) & 0x55;
-               *bp++ = (png_byte)b;
-            }
-            break;
-         }
-
-         case 4:
-         /* Must be 4bpp gray */
-         /* assert(channels == 1) */
-         {
-            png_bytep bp = row;
-            png_bytep bp_end = bp + row_info->rowbytes;
-            int gray_shift = shift[0];
-            int mask =  0xf >> gray_shift;
-
-            mask |= mask << 4;
-
-            while (bp < bp_end)
-            {
-               int b = (*bp >> gray_shift) & mask;
-               *bp++ = (png_byte)b;
-            }
-            break;
-         }
-
-         case 8:
-         /* Single byte components, G, GA, RGB, RGBA */
-         {
-            png_bytep bp = row;
-            png_bytep bp_end = bp + row_info->rowbytes;
-            int channel = 0;
-
-            while (bp < bp_end)
-            {
-               int b = *bp >> shift[channel];
-               if (++channel >= channels)
-                  channel = 0;
-               *bp++ = (png_byte)b;
-            }
-            break;
-         }
-
-#ifdef PNG_READ_16BIT_SUPPORTED
-         case 16:
-         /* Double byte components, G, GA, RGB, RGBA */
-         {
-            png_bytep bp = row;
-            png_bytep bp_end = bp + row_info->rowbytes;
-            int channel = 0;
-
-            while (bp < bp_end)
-            {
-               int value = (bp[0] << 8) + bp[1];
-
-               value >>= shift[channel];
-               if (++channel >= channels)
-                  channel = 0;
-               *bp++ = (png_byte)(value >> 8);
-               *bp++ = (png_byte)value;
-            }
-            break;
-         }
-#endif
-      }
-   }
-}
-#endif
-
-#ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED
-/* Scale rows of bit depth 16 down to 8 accurately */
-static void
-png_do_scale_16_to_8(png_row_infop row_info, png_bytep row)
-{
-   png_debug(1, "in png_do_scale_16_to_8");
-
-   if (row_info->bit_depth == 16)
-   {
-      png_bytep sp = row; /* source */
-      png_bytep dp = row; /* destination */
-      png_bytep ep = sp + row_info->rowbytes; /* end+1 */
-
-      while (sp < ep)
-      {
-         /* The input is an array of 16-bit components, these must be scaled to
-          * 8 bits each.  For a 16-bit value V the required value (from the PNG
-          * specification) is:
-          *
-          *    (V * 255) / 65535
-          *
-          * This reduces to round(V / 257), or floor((V + 128.5)/257)
-          *
-          * Represent V as the two byte value vhi.vlo.  Make a guess that the
-          * result is the top byte of V, vhi, then the correction to this value
-          * is:
-          *
-          *    error = floor(((V-vhi.vhi) + 128.5) / 257)
-          *          = floor(((vlo-vhi) + 128.5) / 257)
-          *
-          * This can be approximated using integer arithmetic (and a signed
-          * shift):
-          *
-          *    error = (vlo-vhi+128) >> 8;
-          *
-          * The approximate differs from the exact answer only when (vlo-vhi) is
-          * 128; it then gives a correction of +1 when the exact correction is
-          * 0.  This gives 128 errors.  The exact answer (correct for all 16-bit
-          * input values) is:
-          *
-          *    error = (vlo-vhi+128)*65535 >> 24;
-          *
-          * An alternative arithmetic calculation which also gives no errors is:
-          *
-          *    (V * 255 + 32895) >> 16
-          */
-
-         png_int_32 tmp = *sp++; /* must be signed! */
-         tmp += (((int)*sp++ - tmp + 128) * 65535) >> 24;
-         *dp++ = (png_byte)tmp;
-      }
-
-      row_info->bit_depth = 8;
-      row_info->pixel_depth = (png_byte)(8 * row_info->channels);
-      row_info->rowbytes = row_info->width * row_info->channels;
-   }
-}
-#endif
-
-#ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED
-static void
-/* Simply discard the low byte.  This was the default behavior prior
- * to libpng-1.5.4.
- */
-png_do_chop(png_row_infop row_info, png_bytep row)
-{
-   png_debug(1, "in png_do_chop");
-
-   if (row_info->bit_depth == 16)
-   {
-      png_bytep sp = row; /* source */
-      png_bytep dp = row; /* destination */
-      png_bytep ep = sp + row_info->rowbytes; /* end+1 */
-
-      while (sp < ep)
-      {
-         *dp++ = *sp;
-         sp += 2; /* skip low byte */
-      }
-
-      row_info->bit_depth = 8;
-      row_info->pixel_depth = (png_byte)(8 * row_info->channels);
-      row_info->rowbytes = row_info->width * row_info->channels;
-   }
-}
-#endif
-
-#ifdef PNG_READ_SWAP_ALPHA_SUPPORTED
-static void
-png_do_read_swap_alpha(png_row_infop row_info, png_bytep row)
-{
-   png_uint_32 row_width = row_info->width;
-
-   png_debug(1, "in png_do_read_swap_alpha");
-
-   if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
-   {
-      /* This converts from RGBA to ARGB */
-      if (row_info->bit_depth == 8)
-      {
-         png_bytep sp = row + row_info->rowbytes;
-         png_bytep dp = sp;
-         png_byte save;
-         png_uint_32 i;
-
-         for (i = 0; i < row_width; i++)
-         {
-            save = *(--sp);
-            *(--dp) = *(--sp);
-            *(--dp) = *(--sp);
-            *(--dp) = *(--sp);
-            *(--dp) = save;
-         }
-      }
-
-#ifdef PNG_READ_16BIT_SUPPORTED
-      /* This converts from RRGGBBAA to AARRGGBB */
-      else
-      {
-         png_bytep sp = row + row_info->rowbytes;
-         png_bytep dp = sp;
-         png_byte save[2];
-         png_uint_32 i;
-
-         for (i = 0; i < row_width; i++)
-         {
-            save[0] = *(--sp);
-            save[1] = *(--sp);
-            *(--dp) = *(--sp);
-            *(--dp) = *(--sp);
-            *(--dp) = *(--sp);
-            *(--dp) = *(--sp);
-            *(--dp) = *(--sp);
-            *(--dp) = *(--sp);
-            *(--dp) = save[0];
-            *(--dp) = save[1];
-         }
-      }
-#endif
-   }
-
-   else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
-   {
-      /* This converts from GA to AG */
-      if (row_info->bit_depth == 8)
-      {
-         png_bytep sp = row + row_info->rowbytes;
-         png_bytep dp = sp;
-         png_byte save;
-         png_uint_32 i;
-
-         for (i = 0; i < row_width; i++)
-         {
-            save = *(--sp);
-            *(--dp) = *(--sp);
-            *(--dp) = save;
-         }
-      }
-
-#ifdef PNG_READ_16BIT_SUPPORTED
-      /* This converts from GGAA to AAGG */
-      else
-      {
-         png_bytep sp = row + row_info->rowbytes;
-         png_bytep dp = sp;
-         png_byte save[2];
-         png_uint_32 i;
-
-         for (i = 0; i < row_width; i++)
-         {
-            save[0] = *(--sp);
-            save[1] = *(--sp);
-            *(--dp) = *(--sp);
-            *(--dp) = *(--sp);
-            *(--dp) = save[0];
-            *(--dp) = save[1];
-         }
-      }
-#endif
-   }
-}
-#endif
-
-#ifdef PNG_READ_INVERT_ALPHA_SUPPORTED
-static void
-png_do_read_invert_alpha(png_row_infop row_info, png_bytep row)
-{
-   png_uint_32 row_width;
-   png_debug(1, "in png_do_read_invert_alpha");
-
-   row_width = row_info->width;
-   if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
-   {
-      if (row_info->bit_depth == 8)
-      {
-         /* This inverts the alpha channel in RGBA */
-         png_bytep sp = row + row_info->rowbytes;
-         png_bytep dp = sp;
-         png_uint_32 i;
-
-         for (i = 0; i < row_width; i++)
-         {
-            *(--dp) = (png_byte)(255 - *(--sp));
-
-/*          This does nothing:
-            *(--dp) = *(--sp);
-            *(--dp) = *(--sp);
-            *(--dp) = *(--sp);
-            We can replace it with:
-*/
-            sp-=3;
-            dp=sp;
-         }
-      }
-
-#ifdef PNG_READ_16BIT_SUPPORTED
-      /* This inverts the alpha channel in RRGGBBAA */
-      else
-      {
-         png_bytep sp = row + row_info->rowbytes;
-         png_bytep dp = sp;
-         png_uint_32 i;
-
-         for (i = 0; i < row_width; i++)
-         {
-            *(--dp) = (png_byte)(255 - *(--sp));
-            *(--dp) = (png_byte)(255 - *(--sp));
-
-/*          This does nothing:
-            *(--dp) = *(--sp);
-            *(--dp) = *(--sp);
-            *(--dp) = *(--sp);
-            *(--dp) = *(--sp);
-            *(--dp) = *(--sp);
-            *(--dp) = *(--sp);
-            We can replace it with:
-*/
-            sp-=6;
-            dp=sp;
-         }
-      }
-#endif
-   }
-   else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
-   {
-      if (row_info->bit_depth == 8)
-      {
-         /* This inverts the alpha channel in GA */
-         png_bytep sp = row + row_info->rowbytes;
-         png_bytep dp = sp;
-         png_uint_32 i;
-
-         for (i = 0; i < row_width; i++)
-         {
-            *(--dp) = (png_byte)(255 - *(--sp));
-            *(--dp) = *(--sp);
-         }
-      }
-
-#ifdef PNG_READ_16BIT_SUPPORTED
-      else
-      {
-         /* This inverts the alpha channel in GGAA */
-         png_bytep sp  = row + row_info->rowbytes;
-         png_bytep dp = sp;
-         png_uint_32 i;
-
-         for (i = 0; i < row_width; i++)
-         {
-            *(--dp) = (png_byte)(255 - *(--sp));
-            *(--dp) = (png_byte)(255 - *(--sp));
-/*
-            *(--dp) = *(--sp);
-            *(--dp) = *(--sp);
-*/
-            sp-=2;
-            dp=sp;
-         }
-      }
-#endif
-   }
-}
-#endif
-
-#ifdef PNG_READ_FILLER_SUPPORTED
-/* Add filler channel if we have RGB color */
-static void
-png_do_read_filler(png_row_infop row_info, png_bytep row,
-    png_uint_32 filler, png_uint_32 flags)
-{
-   png_uint_32 i;
-   png_uint_32 row_width = row_info->width;
-
-#ifdef PNG_READ_16BIT_SUPPORTED
-   png_byte hi_filler = (png_byte)(filler>>8);
-#endif
-   png_byte lo_filler = (png_byte)filler;
-
-   png_debug(1, "in png_do_read_filler");
-
-   if (
-       row_info->color_type == PNG_COLOR_TYPE_GRAY)
-   {
-      if (row_info->bit_depth == 8)
-      {
-         if ((flags & PNG_FLAG_FILLER_AFTER) != 0)
-         {
-            /* This changes the data from G to GX */
-            png_bytep sp = row + (size_t)row_width;
-            png_bytep dp =  sp + (size_t)row_width;
-            for (i = 1; i < row_width; i++)
-            {
-               *(--dp) = lo_filler;
-               *(--dp) = *(--sp);
-            }
-            *(--dp) = lo_filler;
-            row_info->channels = 2;
-            row_info->pixel_depth = 16;
-            row_info->rowbytes = row_width * 2;
-         }
-
-         else
-         {
-            /* This changes the data from G to XG */
-            png_bytep sp = row + (size_t)row_width;
-            png_bytep dp = sp  + (size_t)row_width;
-            for (i = 0; i < row_width; i++)
-            {
-               *(--dp) = *(--sp);
-               *(--dp) = lo_filler;
-            }
-            row_info->channels = 2;
-            row_info->pixel_depth = 16;
-            row_info->rowbytes = row_width * 2;
-         }
-      }
-
-#ifdef PNG_READ_16BIT_SUPPORTED
-      else if (row_info->bit_depth == 16)
-      {
-         if ((flags & PNG_FLAG_FILLER_AFTER) != 0)
-         {
-            /* This changes the data from GG to GGXX */
-            png_bytep sp = row + (size_t)row_width * 2;
-            png_bytep dp = sp  + (size_t)row_width * 2;
-            for (i = 1; i < row_width; i++)
-            {
-               *(--dp) = lo_filler;
-               *(--dp) = hi_filler;
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-            }
-            *(--dp) = lo_filler;
-            *(--dp) = hi_filler;
-            row_info->channels = 2;
-            row_info->pixel_depth = 32;
-            row_info->rowbytes = row_width * 4;
-         }
-
-         else
-         {
-            /* This changes the data from GG to XXGG */
-            png_bytep sp = row + (size_t)row_width * 2;
-            png_bytep dp = sp  + (size_t)row_width * 2;
-            for (i = 0; i < row_width; i++)
-            {
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = lo_filler;
-               *(--dp) = hi_filler;
-            }
-            row_info->channels = 2;
-            row_info->pixel_depth = 32;
-            row_info->rowbytes = row_width * 4;
-         }
-      }
-#endif
-   } /* COLOR_TYPE == GRAY */
-   else if (row_info->color_type == PNG_COLOR_TYPE_RGB)
-   {
-      if (row_info->bit_depth == 8)
-      {
-         if ((flags & PNG_FLAG_FILLER_AFTER) != 0)
-         {
-            /* This changes the data from RGB to RGBX */
-            png_bytep sp = row + (size_t)row_width * 3;
-            png_bytep dp = sp  + (size_t)row_width;
-            for (i = 1; i < row_width; i++)
-            {
-               *(--dp) = lo_filler;
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-            }
-            *(--dp) = lo_filler;
-            row_info->channels = 4;
-            row_info->pixel_depth = 32;
-            row_info->rowbytes = row_width * 4;
-         }
-
-         else
-         {
-            /* This changes the data from RGB to XRGB */
-            png_bytep sp = row + (size_t)row_width * 3;
-            png_bytep dp = sp + (size_t)row_width;
-            for (i = 0; i < row_width; i++)
-            {
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = lo_filler;
-            }
-            row_info->channels = 4;
-            row_info->pixel_depth = 32;
-            row_info->rowbytes = row_width * 4;
-         }
-      }
-
-#ifdef PNG_READ_16BIT_SUPPORTED
-      else if (row_info->bit_depth == 16)
-      {
-         if ((flags & PNG_FLAG_FILLER_AFTER) != 0)
-         {
-            /* This changes the data from RRGGBB to RRGGBBXX */
-            png_bytep sp = row + (size_t)row_width * 6;
-            png_bytep dp = sp  + (size_t)row_width * 2;
-            for (i = 1; i < row_width; i++)
-            {
-               *(--dp) = lo_filler;
-               *(--dp) = hi_filler;
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-            }
-            *(--dp) = lo_filler;
-            *(--dp) = hi_filler;
-            row_info->channels = 4;
-            row_info->pixel_depth = 64;
-            row_info->rowbytes = row_width * 8;
-         }
-
-         else
-         {
-            /* This changes the data from RRGGBB to XXRRGGBB */
-            png_bytep sp = row + (size_t)row_width * 6;
-            png_bytep dp = sp  + (size_t)row_width * 2;
-            for (i = 0; i < row_width; i++)
-            {
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = lo_filler;
-               *(--dp) = hi_filler;
-            }
-
-            row_info->channels = 4;
-            row_info->pixel_depth = 64;
-            row_info->rowbytes = row_width * 8;
-         }
-      }
-#endif
-   } /* COLOR_TYPE == RGB */
-}
-#endif
-
-#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
-/* Expand grayscale files to RGB, with or without alpha */
-static void
-png_do_gray_to_rgb(png_row_infop row_info, png_bytep row)
-{
-   png_uint_32 i;
-   png_uint_32 row_width = row_info->width;
-
-   png_debug(1, "in png_do_gray_to_rgb");
-
-   if (row_info->bit_depth >= 8 &&
-       (row_info->color_type & PNG_COLOR_MASK_COLOR) == 0)
-   {
-      if (row_info->color_type == PNG_COLOR_TYPE_GRAY)
-      {
-         if (row_info->bit_depth == 8)
-         {
-            /* This changes G to RGB */
-            png_bytep sp = row + (size_t)row_width - 1;
-            png_bytep dp = sp  + (size_t)row_width * 2;
-            for (i = 0; i < row_width; i++)
-            {
-               *(dp--) = *sp;
-               *(dp--) = *sp;
-               *(dp--) = *(sp--);
-            }
-         }
-
-         else
-         {
-            /* This changes GG to RRGGBB */
-            png_bytep sp = row + (size_t)row_width * 2 - 1;
-            png_bytep dp = sp  + (size_t)row_width * 4;
-            for (i = 0; i < row_width; i++)
-            {
-               *(dp--) = *sp;
-               *(dp--) = *(sp - 1);
-               *(dp--) = *sp;
-               *(dp--) = *(sp - 1);
-               *(dp--) = *(sp--);
-               *(dp--) = *(sp--);
-            }
-         }
-      }
-
-      else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
-      {
-         if (row_info->bit_depth == 8)
-         {
-            /* This changes GA to RGBA */
-            png_bytep sp = row + (size_t)row_width * 2 - 1;
-            png_bytep dp = sp  + (size_t)row_width * 2;
-            for (i = 0; i < row_width; i++)
-            {
-               *(dp--) = *(sp--);
-               *(dp--) = *sp;
-               *(dp--) = *sp;
-               *(dp--) = *(sp--);
-            }
-         }
-
-         else
-         {
-            /* This changes GGAA to RRGGBBAA */
-            png_bytep sp = row + (size_t)row_width * 4 - 1;
-            png_bytep dp = sp  + (size_t)row_width * 4;
-            for (i = 0; i < row_width; i++)
-            {
-               *(dp--) = *(sp--);
-               *(dp--) = *(sp--);
-               *(dp--) = *sp;
-               *(dp--) = *(sp - 1);
-               *(dp--) = *sp;
-               *(dp--) = *(sp - 1);
-               *(dp--) = *(sp--);
-               *(dp--) = *(sp--);
-            }
-         }
-      }
-      row_info->channels = (png_byte)(row_info->channels + 2);
-      row_info->color_type |= PNG_COLOR_MASK_COLOR;
-      row_info->pixel_depth = (png_byte)(row_info->channels *
-          row_info->bit_depth);
-      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, row_width);
-   }
-}
-#endif
-
-#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
-/* Reduce RGB files to grayscale, with or without alpha
- * using the equation given in Poynton's ColorFAQ of 1998-01-04 at
- * <http://www.inforamp.net/~poynton/>  (THIS LINK IS DEAD June 2008 but
- * versions dated 1998 through November 2002 have been archived at
- * https://web.archive.org/web/20000816232553/www.inforamp.net/
- * ~poynton/notes/colour_and_gamma/ColorFAQ.txt )
- * Charles Poynton poynton at poynton.com
- *
- *     Y = 0.212671 * R + 0.715160 * G + 0.072169 * B
- *
- *  which can be expressed with integers as
- *
- *     Y = (6969 * R + 23434 * G + 2365 * B)/32768
- *
- * Poynton's current link (as of January 2003 through July 2011):
- * <http://www.poynton.com/notes/colour_and_gamma/>
- * has changed the numbers slightly:
- *
- *     Y = 0.2126*R + 0.7152*G + 0.0722*B
- *
- *  which can be expressed with integers as
- *
- *     Y = (6966 * R + 23436 * G + 2366 * B)/32768
- *
- *  Historically, however, libpng uses numbers derived from the ITU-R Rec 709
- *  end point chromaticities and the D65 white point.  Depending on the
- *  precision used for the D65 white point this produces a variety of different
- *  numbers, however if the four decimal place value used in ITU-R Rec 709 is
- *  used (0.3127,0.3290) the Y calculation would be:
- *
- *     Y = (6968 * R + 23435 * G + 2366 * B)/32768
- *
- *  While this is correct the rounding results in an overflow for white, because
- *  the sum of the rounded coefficients is 32769, not 32768.  Consequently
- *  libpng uses, instead, the closest non-overflowing approximation:
- *
- *     Y = (6968 * R + 23434 * G + 2366 * B)/32768
- *
- *  Starting with libpng-1.5.5, if the image being converted has a cHRM chunk
- *  (including an sRGB chunk) then the chromaticities are used to calculate the
- *  coefficients.  See the chunk handling in pngrutil.c for more information.
- *
- *  In all cases the calculation is to be done in a linear colorspace.  If no
- *  gamma information is available to correct the encoding of the original RGB
- *  values this results in an implicit assumption that the original PNG RGB
- *  values were linear.
- *
- *  Other integer coefficients can be used via png_set_rgb_to_gray().  Because
- *  the API takes just red and green coefficients the blue coefficient is
- *  calculated to make the sum 32768.  This will result in different rounding
- *  to that used above.
- */
-static int
-png_do_rgb_to_gray(png_structrp png_ptr, png_row_infop row_info, png_bytep row)
-{
-   int rgb_error = 0;
-
-   png_debug(1, "in png_do_rgb_to_gray");
-
-   if ((row_info->color_type & PNG_COLOR_MASK_PALETTE) == 0 &&
-       (row_info->color_type & PNG_COLOR_MASK_COLOR) != 0)
-   {
-      png_uint_32 rc = png_ptr->rgb_to_gray_red_coeff;
-      png_uint_32 gc = png_ptr->rgb_to_gray_green_coeff;
-      png_uint_32 bc = 32768 - rc - gc;
-      png_uint_32 row_width = row_info->width;
-      int have_alpha = (row_info->color_type & PNG_COLOR_MASK_ALPHA) != 0;
-
-      if (row_info->bit_depth == 8)
-      {
-#ifdef PNG_READ_GAMMA_SUPPORTED
-         /* Notice that gamma to/from 1 are not necessarily inverses (if
-          * there is an overall gamma correction).  Prior to 1.5.5 this code
-          * checked the linearized values for equality; this doesn't match
-          * the documentation, the original values must be checked.
-          */
-         if (png_ptr->gamma_from_1 != NULL && png_ptr->gamma_to_1 != NULL)
-         {
-            png_bytep sp = row;
-            png_bytep dp = row;
-            png_uint_32 i;
-
-            for (i = 0; i < row_width; i++)
-            {
-               png_byte red   = *(sp++);
-               png_byte green = *(sp++);
-               png_byte blue  = *(sp++);
-
-               if (red != green || red != blue)
-               {
-                  red = png_ptr->gamma_to_1[red];
-                  green = png_ptr->gamma_to_1[green];
-                  blue = png_ptr->gamma_to_1[blue];
-
-                  rgb_error |= 1;
-                  *(dp++) = png_ptr->gamma_from_1[
-                      (rc*red + gc*green + bc*blue + 16384)>>15];
-               }
-
-               else
-               {
-                  /* If there is no overall correction the table will not be
-                   * set.
-                   */
-                  if (png_ptr->gamma_table != NULL)
-                     red = png_ptr->gamma_table[red];
-
-                  *(dp++) = red;
-               }
-
-               if (have_alpha != 0)
-                  *(dp++) = *(sp++);
-            }
-         }
-         else
-#endif
-         {
-            png_bytep sp = row;
-            png_bytep dp = row;
-            png_uint_32 i;
-
-            for (i = 0; i < row_width; i++)
-            {
-               png_byte red   = *(sp++);
-               png_byte green = *(sp++);
-               png_byte blue  = *(sp++);
-
-               if (red != green || red != blue)
-               {
-                  rgb_error |= 1;
-                  /* NOTE: this is the historical approach which simply
-                   * truncates the results.
-                   */
-                  *(dp++) = (png_byte)((rc*red + gc*green + bc*blue)>>15);
-               }
-
-               else
-                  *(dp++) = red;
-
-               if (have_alpha != 0)
-                  *(dp++) = *(sp++);
-            }
-         }
-      }
-
-      else /* RGB bit_depth == 16 */
-      {
-#ifdef PNG_READ_GAMMA_SUPPORTED
-         if (png_ptr->gamma_16_to_1 != NULL && png_ptr->gamma_16_from_1 != NULL)
-         {
-            png_bytep sp = row;
-            png_bytep dp = row;
-            png_uint_32 i;
-
-            for (i = 0; i < row_width; i++)
-            {
-               png_uint_16 red, green, blue, w;
-               png_byte hi,lo;
-
-               hi=*(sp)++; lo=*(sp)++; red   = (png_uint_16)((hi << 8) | (lo));
-               hi=*(sp)++; lo=*(sp)++; green = (png_uint_16)((hi << 8) | (lo));
-               hi=*(sp)++; lo=*(sp)++; blue  = (png_uint_16)((hi << 8) | (lo));
-
-               if (red == green && red == blue)
-               {
-                  if (png_ptr->gamma_16_table != NULL)
-                     w = png_ptr->gamma_16_table[(red & 0xff)
-                         >> png_ptr->gamma_shift][red >> 8];
-
-                  else
-                     w = red;
-               }
-
-               else
-               {
-                  png_uint_16 red_1   = png_ptr->gamma_16_to_1[(red & 0xff)
-                      >> png_ptr->gamma_shift][red>>8];
-                  png_uint_16 green_1 =
-                      png_ptr->gamma_16_to_1[(green & 0xff) >>
-                      png_ptr->gamma_shift][green>>8];
-                  png_uint_16 blue_1  = png_ptr->gamma_16_to_1[(blue & 0xff)
-                      >> png_ptr->gamma_shift][blue>>8];
-                  png_uint_16 gray16  = (png_uint_16)((rc*red_1 + gc*green_1
-                      + bc*blue_1 + 16384)>>15);
-                  w = png_ptr->gamma_16_from_1[(gray16 & 0xff) >>
-                      png_ptr->gamma_shift][gray16 >> 8];
-                  rgb_error |= 1;
-               }
-
-               *(dp++) = (png_byte)((w>>8) & 0xff);
-               *(dp++) = (png_byte)(w & 0xff);
-
-               if (have_alpha != 0)
-               {
-                  *(dp++) = *(sp++);
-                  *(dp++) = *(sp++);
-               }
-            }
-         }
-         else
-#endif
-         {
-            png_bytep sp = row;
-            png_bytep dp = row;
-            png_uint_32 i;
-
-            for (i = 0; i < row_width; i++)
-            {
-               png_uint_16 red, green, blue, gray16;
-               png_byte hi,lo;
-
-               hi=*(sp)++; lo=*(sp)++; red   = (png_uint_16)((hi << 8) | (lo));
-               hi=*(sp)++; lo=*(sp)++; green = (png_uint_16)((hi << 8) | (lo));
-               hi=*(sp)++; lo=*(sp)++; blue  = (png_uint_16)((hi << 8) | (lo));
-
-               if (red != green || red != blue)
-                  rgb_error |= 1;
-
-               /* From 1.5.5 in the 16-bit case do the accurate conversion even
-                * in the 'fast' case - this is because this is where the code
-                * ends up when handling linear 16-bit data.
-                */
-               gray16  = (png_uint_16)((rc*red + gc*green + bc*blue + 16384) >>
-                  15);
-               *(dp++) = (png_byte)((gray16 >> 8) & 0xff);
-               *(dp++) = (png_byte)(gray16 & 0xff);
-
-               if (have_alpha != 0)
-               {
-                  *(dp++) = *(sp++);
-                  *(dp++) = *(sp++);
-               }
-            }
-         }
-      }
-
-      row_info->channels = (png_byte)(row_info->channels - 2);
-      row_info->color_type = (png_byte)(row_info->color_type &
-          ~PNG_COLOR_MASK_COLOR);
-      row_info->pixel_depth = (png_byte)(row_info->channels *
-          row_info->bit_depth);
-      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, row_width);
-   }
-   return rgb_error;
-}
-#endif
-
-#if defined(PNG_READ_BACKGROUND_SUPPORTED) ||\
-   defined(PNG_READ_ALPHA_MODE_SUPPORTED)
-/* Replace any alpha or transparency with the supplied background color.
- * "background" is already in the screen gamma, while "background_1" is
- * at a gamma of 1.0.  Paletted files have already been taken care of.
- */
-static void
-png_do_compose(png_row_infop row_info, png_bytep row, png_structrp png_ptr)
-{
-#ifdef PNG_READ_GAMMA_SUPPORTED
-   png_const_bytep gamma_table = png_ptr->gamma_table;
-   png_const_bytep gamma_from_1 = png_ptr->gamma_from_1;
-   png_const_bytep gamma_to_1 = png_ptr->gamma_to_1;
-   png_const_uint_16pp gamma_16 = png_ptr->gamma_16_table;
-   png_const_uint_16pp gamma_16_from_1 = png_ptr->gamma_16_from_1;
-   png_const_uint_16pp gamma_16_to_1 = png_ptr->gamma_16_to_1;
-   int gamma_shift = png_ptr->gamma_shift;
-   int optimize = (png_ptr->flags & PNG_FLAG_OPTIMIZE_ALPHA) != 0;
-#endif
-
-   png_bytep sp;
-   png_uint_32 i;
-   png_uint_32 row_width = row_info->width;
-   int shift;
-
-   png_debug(1, "in png_do_compose");
-
-   switch (row_info->color_type)
-   {
-      case PNG_COLOR_TYPE_GRAY:
-      {
-         switch (row_info->bit_depth)
-         {
-            case 1:
-            {
-               sp = row;
-               shift = 7;
-               for (i = 0; i < row_width; i++)
-               {
-                  if ((png_uint_16)((*sp >> shift) & 0x01)
-                     == png_ptr->trans_color.gray)
-                  {
-                     unsigned int tmp = *sp & (0x7f7f >> (7 - shift));
-                     tmp |=
-                         (unsigned int)(png_ptr->background.gray << shift);
-                     *sp = (png_byte)(tmp & 0xff);
-                  }
-
-                  if (shift == 0)
-                  {
-                     shift = 7;
-                     sp++;
-                  }
-
-                  else
-                     shift--;
-               }
-               break;
-            }
-
-            case 2:
-            {
-#ifdef PNG_READ_GAMMA_SUPPORTED
-               if (gamma_table != NULL)
-               {
-                  sp = row;
-                  shift = 6;
-                  for (i = 0; i < row_width; i++)
-                  {
-                     if ((png_uint_16)((*sp >> shift) & 0x03)
-                         == png_ptr->trans_color.gray)
-                     {
-                        unsigned int tmp = *sp & (0x3f3f >> (6 - shift));
-                        tmp |=
-                           (unsigned int)png_ptr->background.gray << shift;
-                        *sp = (png_byte)(tmp & 0xff);
-                     }
-
-                     else
-                     {
-                        unsigned int p = (*sp >> shift) & 0x03;
-                        unsigned int g = (gamma_table [p | (p << 2) |
-                            (p << 4) | (p << 6)] >> 6) & 0x03;
-                        unsigned int tmp = *sp & (0x3f3f >> (6 - shift));
-                        tmp |= (unsigned int)(g << shift);
-                        *sp = (png_byte)(tmp & 0xff);
-                     }
-
-                     if (shift == 0)
-                     {
-                        shift = 6;
-                        sp++;
-                     }
-
-                     else
-                        shift -= 2;
-                  }
-               }
-
-               else
-#endif
-               {
-                  sp = row;
-                  shift = 6;
-                  for (i = 0; i < row_width; i++)
-                  {
-                     if ((png_uint_16)((*sp >> shift) & 0x03)
-                         == png_ptr->trans_color.gray)
-                     {
-                        unsigned int tmp = *sp & (0x3f3f >> (6 - shift));
-                        tmp |=
-                            (unsigned int)png_ptr->background.gray << shift;
-                        *sp = (png_byte)(tmp & 0xff);
-                     }
-
-                     if (shift == 0)
-                     {
-                        shift = 6;
-                        sp++;
-                     }
-
-                     else
-                        shift -= 2;
-                  }
-               }
-               break;
-            }
-
-            case 4:
-            {
-#ifdef PNG_READ_GAMMA_SUPPORTED
-               if (gamma_table != NULL)
-               {
-                  sp = row;
-                  shift = 4;
-                  for (i = 0; i < row_width; i++)
-                  {
-                     if ((png_uint_16)((*sp >> shift) & 0x0f)
-                         == png_ptr->trans_color.gray)
-                     {
-                        unsigned int tmp = *sp & (0x0f0f >> (4 - shift));
-                        tmp |=
-                           (unsigned int)(png_ptr->background.gray << shift);
-                        *sp = (png_byte)(tmp & 0xff);
-                     }
-
-                     else
-                     {
-                        unsigned int p = (*sp >> shift) & 0x0f;
-                        unsigned int g = (gamma_table[p | (p << 4)] >> 4) &
-                           0x0f;
-                        unsigned int tmp = *sp & (0x0f0f >> (4 - shift));
-                        tmp |= (unsigned int)(g << shift);
-                        *sp = (png_byte)(tmp & 0xff);
-                     }
-
-                     if (shift == 0)
-                     {
-                        shift = 4;
-                        sp++;
-                     }
-
-                     else
-                        shift -= 4;
-                  }
-               }
-
-               else
-#endif
-               {
-                  sp = row;
-                  shift = 4;
-                  for (i = 0; i < row_width; i++)
-                  {
-                     if ((png_uint_16)((*sp >> shift) & 0x0f)
-                         == png_ptr->trans_color.gray)
-                     {
-                        unsigned int tmp = *sp & (0x0f0f >> (4 - shift));
-                        tmp |=
-                           (unsigned int)(png_ptr->background.gray << shift);
-                        *sp = (png_byte)(tmp & 0xff);
-                     }
-
-                     if (shift == 0)
-                     {
-                        shift = 4;
-                        sp++;
-                     }
-
-                     else
-                        shift -= 4;
-                  }
-               }
-               break;
-            }
-
-            case 8:
-            {
-#ifdef PNG_READ_GAMMA_SUPPORTED
-               if (gamma_table != NULL)
-               {
-                  sp = row;
-                  for (i = 0; i < row_width; i++, sp++)
-                  {
-                     if (*sp == png_ptr->trans_color.gray)
-                        *sp = (png_byte)png_ptr->background.gray;
-
-                     else
-                        *sp = gamma_table[*sp];
-                  }
-               }
-               else
-#endif
-               {
-                  sp = row;
-                  for (i = 0; i < row_width; i++, sp++)
-                  {
-                     if (*sp == png_ptr->trans_color.gray)
-                        *sp = (png_byte)png_ptr->background.gray;
-                  }
-               }
-               break;
-            }
-
-            case 16:
-            {
-#ifdef PNG_READ_GAMMA_SUPPORTED
-               if (gamma_16 != NULL)
-               {
-                  sp = row;
-                  for (i = 0; i < row_width; i++, sp += 2)
-                  {
-                     png_uint_16 v;
-
-                     v = (png_uint_16)(((*sp) << 8) + *(sp + 1));
-
-                     if (v == png_ptr->trans_color.gray)
-                     {
-                        /* Background is already in screen gamma */
-                        *sp = (png_byte)((png_ptr->background.gray >> 8)
-                             & 0xff);
-                        *(sp + 1) = (png_byte)(png_ptr->background.gray
-                             & 0xff);
-                     }
-
-                     else
-                     {
-                        v = gamma_16[*(sp + 1) >> gamma_shift][*sp];
-                        *sp = (png_byte)((v >> 8) & 0xff);
-                        *(sp + 1) = (png_byte)(v & 0xff);
-                     }
-                  }
-               }
-               else
-#endif
-               {
-                  sp = row;
-                  for (i = 0; i < row_width; i++, sp += 2)
-                  {
-                     png_uint_16 v;
-
-                     v = (png_uint_16)(((*sp) << 8) + *(sp + 1));
-
-                     if (v == png_ptr->trans_color.gray)
-                     {
-                        *sp = (png_byte)((png_ptr->background.gray >> 8)
-                             & 0xff);
-                        *(sp + 1) = (png_byte)(png_ptr->background.gray
-                             & 0xff);
-                     }
-                  }
-               }
-               break;
-            }
-
-            default:
-               break;
-         }
-         break;
-      }
-
-      case PNG_COLOR_TYPE_RGB:
-      {
-         if (row_info->bit_depth == 8)
-         {
-#ifdef PNG_READ_GAMMA_SUPPORTED
-            if (gamma_table != NULL)
-            {
-               sp = row;
-               for (i = 0; i < row_width; i++, sp += 3)
-               {
-                  if (*sp == png_ptr->trans_color.red &&
-                      *(sp + 1) == png_ptr->trans_color.green &&
-                      *(sp + 2) == png_ptr->trans_color.blue)
-                  {
-                     *sp = (png_byte)png_ptr->background.red;
-                     *(sp + 1) = (png_byte)png_ptr->background.green;
-                     *(sp + 2) = (png_byte)png_ptr->background.blue;
-                  }
-
-                  else
-                  {
-                     *sp = gamma_table[*sp];
-                     *(sp + 1) = gamma_table[*(sp + 1)];
-                     *(sp + 2) = gamma_table[*(sp + 2)];
-                  }
-               }
-            }
-            else
-#endif
-            {
-               sp = row;
-               for (i = 0; i < row_width; i++, sp += 3)
-               {
-                  if (*sp == png_ptr->trans_color.red &&
-                      *(sp + 1) == png_ptr->trans_color.green &&
-                      *(sp + 2) == png_ptr->trans_color.blue)
-                  {
-                     *sp = (png_byte)png_ptr->background.red;
-                     *(sp + 1) = (png_byte)png_ptr->background.green;
-                     *(sp + 2) = (png_byte)png_ptr->background.blue;
-                  }
-               }
-            }
-         }
-         else /* if (row_info->bit_depth == 16) */
-         {
-#ifdef PNG_READ_GAMMA_SUPPORTED
-            if (gamma_16 != NULL)
-            {
-               sp = row;
-               for (i = 0; i < row_width; i++, sp += 6)
-               {
-                  png_uint_16 r = (png_uint_16)(((*sp) << 8) + *(sp + 1));
-
-                  png_uint_16 g = (png_uint_16)(((*(sp + 2)) << 8)
-                      + *(sp + 3));
-
-                  png_uint_16 b = (png_uint_16)(((*(sp + 4)) << 8)
-                      + *(sp + 5));
-
-                  if (r == png_ptr->trans_color.red &&
-                      g == png_ptr->trans_color.green &&
-                      b == png_ptr->trans_color.blue)
-                  {
-                     /* Background is already in screen gamma */
-                     *sp = (png_byte)((png_ptr->background.red >> 8) & 0xff);
-                     *(sp + 1) = (png_byte)(png_ptr->background.red & 0xff);
-                     *(sp + 2) = (png_byte)((png_ptr->background.green >> 8)
-                             & 0xff);
-                     *(sp + 3) = (png_byte)(png_ptr->background.green
-                             & 0xff);
-                     *(sp + 4) = (png_byte)((png_ptr->background.blue >> 8)
-                             & 0xff);
-                     *(sp + 5) = (png_byte)(png_ptr->background.blue & 0xff);
-                  }
-
-                  else
-                  {
-                     png_uint_16 v = gamma_16[*(sp + 1) >> gamma_shift][*sp];
-                     *sp = (png_byte)((v >> 8) & 0xff);
-                     *(sp + 1) = (png_byte)(v & 0xff);
-
-                     v = gamma_16[*(sp + 3) >> gamma_shift][*(sp + 2)];
-                     *(sp + 2) = (png_byte)((v >> 8) & 0xff);
-                     *(sp + 3) = (png_byte)(v & 0xff);
-
-                     v = gamma_16[*(sp + 5) >> gamma_shift][*(sp + 4)];
-                     *(sp + 4) = (png_byte)((v >> 8) & 0xff);
-                     *(sp + 5) = (png_byte)(v & 0xff);
-                  }
-               }
-            }
-
-            else
-#endif
-            {
-               sp = row;
-               for (i = 0; i < row_width; i++, sp += 6)
-               {
-                  png_uint_16 r = (png_uint_16)(((*sp) << 8) + *(sp + 1));
-
-                  png_uint_16 g = (png_uint_16)(((*(sp + 2)) << 8)
-                      + *(sp + 3));
-
-                  png_uint_16 b = (png_uint_16)(((*(sp + 4)) << 8)
-                      + *(sp + 5));
-
-                  if (r == png_ptr->trans_color.red &&
-                      g == png_ptr->trans_color.green &&
-                      b == png_ptr->trans_color.blue)
-                  {
-                     *sp = (png_byte)((png_ptr->background.red >> 8) & 0xff);
-                     *(sp + 1) = (png_byte)(png_ptr->background.red & 0xff);
-                     *(sp + 2) = (png_byte)((png_ptr->background.green >> 8)
-                             & 0xff);
-                     *(sp + 3) = (png_byte)(png_ptr->background.green
-                             & 0xff);
-                     *(sp + 4) = (png_byte)((png_ptr->background.blue >> 8)
-                             & 0xff);
-                     *(sp + 5) = (png_byte)(png_ptr->background.blue & 0xff);
-                  }
-               }
-            }
-         }
-         break;
-      }
-
-      case PNG_COLOR_TYPE_GRAY_ALPHA:
-      {
-         if (row_info->bit_depth == 8)
-         {
-#ifdef PNG_READ_GAMMA_SUPPORTED
-            if (gamma_to_1 != NULL && gamma_from_1 != NULL &&
-                gamma_table != NULL)
-            {
-               sp = row;
-               for (i = 0; i < row_width; i++, sp += 2)
-               {
-                  png_uint_16 a = *(sp + 1);
-
-                  if (a == 0xff)
-                     *sp = gamma_table[*sp];
-
-                  else if (a == 0)
-                  {
-                     /* Background is already in screen gamma */
-                     *sp = (png_byte)png_ptr->background.gray;
-                  }
-
-                  else
-                  {
-                     png_byte v, w;
-
-                     v = gamma_to_1[*sp];
-                     png_composite(w, v, a, png_ptr->background_1.gray);
-                     if (optimize == 0)
-                        w = gamma_from_1[w];
-                     *sp = w;
-                  }
-               }
-            }
-            else
-#endif
-            {
-               sp = row;
-               for (i = 0; i < row_width; i++, sp += 2)
-               {
-                  png_byte a = *(sp + 1);
-
-                  if (a == 0)
-                     *sp = (png_byte)png_ptr->background.gray;
-
-                  else if (a < 0xff)
-                     png_composite(*sp, *sp, a, png_ptr->background.gray);
-               }
-            }
-         }
-         else /* if (png_ptr->bit_depth == 16) */
-         {
-#ifdef PNG_READ_GAMMA_SUPPORTED
-            if (gamma_16 != NULL && gamma_16_from_1 != NULL &&
-                gamma_16_to_1 != NULL)
-            {
-               sp = row;
-               for (i = 0; i < row_width; i++, sp += 4)
-               {
-                  png_uint_16 a = (png_uint_16)(((*(sp + 2)) << 8)
-                      + *(sp + 3));
-
-                  if (a == (png_uint_16)0xffff)
-                  {
-                     png_uint_16 v;
-
-                     v = gamma_16[*(sp + 1) >> gamma_shift][*sp];
-                     *sp = (png_byte)((v >> 8) & 0xff);
-                     *(sp + 1) = (png_byte)(v & 0xff);
-                  }
-
-                  else if (a == 0)
-                  {
-                     /* Background is already in screen gamma */
-                     *sp = (png_byte)((png_ptr->background.gray >> 8)
-                             & 0xff);
-                     *(sp + 1) = (png_byte)(png_ptr->background.gray & 0xff);
-                  }
-
-                  else
-                  {
-                     png_uint_16 g, v, w;
-
-                     g = gamma_16_to_1[*(sp + 1) >> gamma_shift][*sp];
-                     png_composite_16(v, g, a, png_ptr->background_1.gray);
-                     if (optimize != 0)
-                        w = v;
-                     else
-                        w = gamma_16_from_1[(v & 0xff) >>
-                            gamma_shift][v >> 8];
-                     *sp = (png_byte)((w >> 8) & 0xff);
-                     *(sp + 1) = (png_byte)(w & 0xff);
-                  }
-               }
-            }
-            else
-#endif
-            {
-               sp = row;
-               for (i = 0; i < row_width; i++, sp += 4)
-               {
-                  png_uint_16 a = (png_uint_16)(((*(sp + 2)) << 8)
-                      + *(sp + 3));
-
-                  if (a == 0)
-                  {
-                     *sp = (png_byte)((png_ptr->background.gray >> 8)
-                             & 0xff);
-                     *(sp + 1) = (png_byte)(png_ptr->background.gray & 0xff);
-                  }
-
-                  else if (a < 0xffff)
-                  {
-                     png_uint_16 g, v;
-
-                     g = (png_uint_16)(((*sp) << 8) + *(sp + 1));
-                     png_composite_16(v, g, a, png_ptr->background.gray);
-                     *sp = (png_byte)((v >> 8) & 0xff);
-                     *(sp + 1) = (png_byte)(v & 0xff);
-                  }
-               }
-            }
-         }
-         break;
-      }
-
-      case PNG_COLOR_TYPE_RGB_ALPHA:
-      {
-         if (row_info->bit_depth == 8)
-         {
-#ifdef PNG_READ_GAMMA_SUPPORTED
-            if (gamma_to_1 != NULL && gamma_from_1 != NULL &&
-                gamma_table != NULL)
-            {
-               sp = row;
-               for (i = 0; i < row_width; i++, sp += 4)
-               {
-                  png_byte a = *(sp + 3);
-
-                  if (a == 0xff)
-                  {
-                     *sp = gamma_table[*sp];
-                     *(sp + 1) = gamma_table[*(sp + 1)];
-                     *(sp + 2) = gamma_table[*(sp + 2)];
-                  }
-
-                  else if (a == 0)
-                  {
-                     /* Background is already in screen gamma */
-                     *sp = (png_byte)png_ptr->background.red;
-                     *(sp + 1) = (png_byte)png_ptr->background.green;
-                     *(sp + 2) = (png_byte)png_ptr->background.blue;
-                  }
-
-                  else
-                  {
-                     png_byte v, w;
-
-                     v = gamma_to_1[*sp];
-                     png_composite(w, v, a, png_ptr->background_1.red);
-                     if (optimize == 0) w = gamma_from_1[w];
-                     *sp = w;
-
-                     v = gamma_to_1[*(sp + 1)];
-                     png_composite(w, v, a, png_ptr->background_1.green);
-                     if (optimize == 0) w = gamma_from_1[w];
-                     *(sp + 1) = w;
-
-                     v = gamma_to_1[*(sp + 2)];
-                     png_composite(w, v, a, png_ptr->background_1.blue);
-                     if (optimize == 0) w = gamma_from_1[w];
-                     *(sp + 2) = w;
-                  }
-               }
-            }
-            else
-#endif
-            {
-               sp = row;
-               for (i = 0; i < row_width; i++, sp += 4)
-               {
-                  png_byte a = *(sp + 3);
-
-                  if (a == 0)
-                  {
-                     *sp = (png_byte)png_ptr->background.red;
-                     *(sp + 1) = (png_byte)png_ptr->background.green;
-                     *(sp + 2) = (png_byte)png_ptr->background.blue;
-                  }
-
-                  else if (a < 0xff)
-                  {
-                     png_composite(*sp, *sp, a, png_ptr->background.red);
-
-                     png_composite(*(sp + 1), *(sp + 1), a,
-                         png_ptr->background.green);
-
-                     png_composite(*(sp + 2), *(sp + 2), a,
-                         png_ptr->background.blue);
-                  }
-               }
-            }
-         }
-         else /* if (row_info->bit_depth == 16) */
-         {
-#ifdef PNG_READ_GAMMA_SUPPORTED
-            if (gamma_16 != NULL && gamma_16_from_1 != NULL &&
-                gamma_16_to_1 != NULL)
-            {
-               sp = row;
-               for (i = 0; i < row_width; i++, sp += 8)
-               {
-                  png_uint_16 a = (png_uint_16)(((png_uint_16)(*(sp + 6))
-                      << 8) + (png_uint_16)(*(sp + 7)));
-
-                  if (a == (png_uint_16)0xffff)
-                  {
-                     png_uint_16 v;
-
-                     v = gamma_16[*(sp + 1) >> gamma_shift][*sp];
-                     *sp = (png_byte)((v >> 8) & 0xff);
-                     *(sp + 1) = (png_byte)(v & 0xff);
-
-                     v = gamma_16[*(sp + 3) >> gamma_shift][*(sp + 2)];
-                     *(sp + 2) = (png_byte)((v >> 8) & 0xff);
-                     *(sp + 3) = (png_byte)(v & 0xff);
-
-                     v = gamma_16[*(sp + 5) >> gamma_shift][*(sp + 4)];
-                     *(sp + 4) = (png_byte)((v >> 8) & 0xff);
-                     *(sp + 5) = (png_byte)(v & 0xff);
-                  }
-
-                  else if (a == 0)
-                  {
-                     /* Background is already in screen gamma */
-                     *sp = (png_byte)((png_ptr->background.red >> 8) & 0xff);
-                     *(sp + 1) = (png_byte)(png_ptr->background.red & 0xff);
-                     *(sp + 2) = (png_byte)((png_ptr->background.green >> 8)
-                             & 0xff);
-                     *(sp + 3) = (png_byte)(png_ptr->background.green
-                             & 0xff);
-                     *(sp + 4) = (png_byte)((png_ptr->background.blue >> 8)
-                             & 0xff);
-                     *(sp + 5) = (png_byte)(png_ptr->background.blue & 0xff);
-                  }
-
-                  else
-                  {
-                     png_uint_16 v, w;
-
-                     v = gamma_16_to_1[*(sp + 1) >> gamma_shift][*sp];
-                     png_composite_16(w, v, a, png_ptr->background_1.red);
-                     if (optimize == 0)
-                        w = gamma_16_from_1[((w & 0xff) >> gamma_shift)][w >>
-                             8];
-                     *sp = (png_byte)((w >> 8) & 0xff);
-                     *(sp + 1) = (png_byte)(w & 0xff);
-
-                     v = gamma_16_to_1[*(sp + 3) >> gamma_shift][*(sp + 2)];
-                     png_composite_16(w, v, a, png_ptr->background_1.green);
-                     if (optimize == 0)
-                        w = gamma_16_from_1[((w & 0xff) >> gamma_shift)][w >>
-                             8];
-
-                     *(sp + 2) = (png_byte)((w >> 8) & 0xff);
-                     *(sp + 3) = (png_byte)(w & 0xff);
-
-                     v = gamma_16_to_1[*(sp + 5) >> gamma_shift][*(sp + 4)];
-                     png_composite_16(w, v, a, png_ptr->background_1.blue);
-                     if (optimize == 0)
-                        w = gamma_16_from_1[((w & 0xff) >> gamma_shift)][w >>
-                             8];
-
-                     *(sp + 4) = (png_byte)((w >> 8) & 0xff);
-                     *(sp + 5) = (png_byte)(w & 0xff);
-                  }
-               }
-            }
-
-            else
-#endif
-            {
-               sp = row;
-               for (i = 0; i < row_width; i++, sp += 8)
-               {
-                  png_uint_16 a = (png_uint_16)(((png_uint_16)(*(sp + 6))
-                      << 8) + (png_uint_16)(*(sp + 7)));
-
-                  if (a == 0)
-                  {
-                     *sp = (png_byte)((png_ptr->background.red >> 8) & 0xff);
-                     *(sp + 1) = (png_byte)(png_ptr->background.red & 0xff);
-                     *(sp + 2) = (png_byte)((png_ptr->background.green >> 8)
-                             & 0xff);
-                     *(sp + 3) = (png_byte)(png_ptr->background.green
-                             & 0xff);
-                     *(sp + 4) = (png_byte)((png_ptr->background.blue >> 8)
-                             & 0xff);
-                     *(sp + 5) = (png_byte)(png_ptr->background.blue & 0xff);
-                  }
-
-                  else if (a < 0xffff)
-                  {
-                     png_uint_16 v;
-
-                     png_uint_16 r = (png_uint_16)(((*sp) << 8) + *(sp + 1));
-                     png_uint_16 g = (png_uint_16)(((*(sp + 2)) << 8)
-                         + *(sp + 3));
-                     png_uint_16 b = (png_uint_16)(((*(sp + 4)) << 8)
-                         + *(sp + 5));
-
-                     png_composite_16(v, r, a, png_ptr->background.red);
-                     *sp = (png_byte)((v >> 8) & 0xff);
-                     *(sp + 1) = (png_byte)(v & 0xff);
-
-                     png_composite_16(v, g, a, png_ptr->background.green);
-                     *(sp + 2) = (png_byte)((v >> 8) & 0xff);
-                     *(sp + 3) = (png_byte)(v & 0xff);
-
-                     png_composite_16(v, b, a, png_ptr->background.blue);
-                     *(sp + 4) = (png_byte)((v >> 8) & 0xff);
-                     *(sp + 5) = (png_byte)(v & 0xff);
-                  }
-               }
-            }
-         }
-         break;
-      }
-
-      default:
-         break;
-   }
-}
-#endif /* READ_BACKGROUND || READ_ALPHA_MODE */
-
-#ifdef PNG_READ_GAMMA_SUPPORTED
-/* Gamma correct the image, avoiding the alpha channel.  Make sure
- * you do this after you deal with the transparency issue on grayscale
- * or RGB images. If your bit depth is 8, use gamma_table, if it
- * is 16, use gamma_16_table and gamma_shift.  Build these with
- * build_gamma_table().
- */
-static void
-png_do_gamma(png_row_infop row_info, png_bytep row, png_structrp png_ptr)
-{
-   png_const_bytep gamma_table = png_ptr->gamma_table;
-   png_const_uint_16pp gamma_16_table = png_ptr->gamma_16_table;
-   int gamma_shift = png_ptr->gamma_shift;
-
-   png_bytep sp;
-   png_uint_32 i;
-   png_uint_32 row_width=row_info->width;
-
-   png_debug(1, "in png_do_gamma");
-
-   if (((row_info->bit_depth <= 8 && gamma_table != NULL) ||
-       (row_info->bit_depth == 16 && gamma_16_table != NULL)))
-   {
-      switch (row_info->color_type)
-      {
-         case PNG_COLOR_TYPE_RGB:
-         {
-            if (row_info->bit_depth == 8)
-            {
-               sp = row;
-               for (i = 0; i < row_width; i++)
-               {
-                  *sp = gamma_table[*sp];
-                  sp++;
-                  *sp = gamma_table[*sp];
-                  sp++;
-                  *sp = gamma_table[*sp];
-                  sp++;
-               }
-            }
-
-            else /* if (row_info->bit_depth == 16) */
-            {
-               sp = row;
-               for (i = 0; i < row_width; i++)
-               {
-                  png_uint_16 v;
-
-                  v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
-                  *sp = (png_byte)((v >> 8) & 0xff);
-                  *(sp + 1) = (png_byte)(v & 0xff);
-                  sp += 2;
-
-                  v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
-                  *sp = (png_byte)((v >> 8) & 0xff);
-                  *(sp + 1) = (png_byte)(v & 0xff);
-                  sp += 2;
-
-                  v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
-                  *sp = (png_byte)((v >> 8) & 0xff);
-                  *(sp + 1) = (png_byte)(v & 0xff);
-                  sp += 2;
-               }
-            }
-            break;
-         }
-
-         case PNG_COLOR_TYPE_RGB_ALPHA:
-         {
-            if (row_info->bit_depth == 8)
-            {
-               sp = row;
-               for (i = 0; i < row_width; i++)
-               {
-                  *sp = gamma_table[*sp];
-                  sp++;
-
-                  *sp = gamma_table[*sp];
-                  sp++;
-
-                  *sp = gamma_table[*sp];
-                  sp++;
-
-                  sp++;
-               }
-            }
-
-            else /* if (row_info->bit_depth == 16) */
-            {
-               sp = row;
-               for (i = 0; i < row_width; i++)
-               {
-                  png_uint_16 v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
-                  *sp = (png_byte)((v >> 8) & 0xff);
-                  *(sp + 1) = (png_byte)(v & 0xff);
-                  sp += 2;
-
-                  v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
-                  *sp = (png_byte)((v >> 8) & 0xff);
-                  *(sp + 1) = (png_byte)(v & 0xff);
-                  sp += 2;
-
-                  v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
-                  *sp = (png_byte)((v >> 8) & 0xff);
-                  *(sp + 1) = (png_byte)(v & 0xff);
-                  sp += 4;
-               }
-            }
-            break;
-         }
-
-         case PNG_COLOR_TYPE_GRAY_ALPHA:
-         {
-            if (row_info->bit_depth == 8)
-            {
-               sp = row;
-               for (i = 0; i < row_width; i++)
-               {
-                  *sp = gamma_table[*sp];
-                  sp += 2;
-               }
-            }
-
-            else /* if (row_info->bit_depth == 16) */
-            {
-               sp = row;
-               for (i = 0; i < row_width; i++)
-               {
-                  png_uint_16 v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
-                  *sp = (png_byte)((v >> 8) & 0xff);
-                  *(sp + 1) = (png_byte)(v & 0xff);
-                  sp += 4;
-               }
-            }
-            break;
-         }
-
-         case PNG_COLOR_TYPE_GRAY:
-         {
-            if (row_info->bit_depth == 2)
-            {
-               sp = row;
-               for (i = 0; i < row_width; i += 4)
-               {
-                  int a = *sp & 0xc0;
-                  int b = *sp & 0x30;
-                  int c = *sp & 0x0c;
-                  int d = *sp & 0x03;
-
-                  *sp = (png_byte)(
-                      ((((int)gamma_table[a|(a>>2)|(a>>4)|(a>>6)])   ) & 0xc0)|
-                      ((((int)gamma_table[(b<<2)|b|(b>>2)|(b>>4)])>>2) & 0x30)|
-                      ((((int)gamma_table[(c<<4)|(c<<2)|c|(c>>2)])>>4) & 0x0c)|
-                      ((((int)gamma_table[(d<<6)|(d<<4)|(d<<2)|d])>>6) ));
-                  sp++;
-               }
-            }
-
-            if (row_info->bit_depth == 4)
-            {
-               sp = row;
-               for (i = 0; i < row_width; i += 2)
-               {
-                  int msb = *sp & 0xf0;
-                  int lsb = *sp & 0x0f;
-
-                  *sp = (png_byte)((((int)gamma_table[msb | (msb >> 4)]) & 0xf0)
-                      | (((int)gamma_table[(lsb << 4) | lsb]) >> 4));
-                  sp++;
-               }
-            }
-
-            else if (row_info->bit_depth == 8)
-            {
-               sp = row;
-               for (i = 0; i < row_width; i++)
-               {
-                  *sp = gamma_table[*sp];
-                  sp++;
-               }
-            }
-
-            else if (row_info->bit_depth == 16)
-            {
-               sp = row;
-               for (i = 0; i < row_width; i++)
-               {
-                  png_uint_16 v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
-                  *sp = (png_byte)((v >> 8) & 0xff);
-                  *(sp + 1) = (png_byte)(v & 0xff);
-                  sp += 2;
-               }
-            }
-            break;
-         }
-
-         default:
-            break;
-      }
-   }
-}
-#endif
-
-#ifdef PNG_READ_ALPHA_MODE_SUPPORTED
-/* Encode the alpha channel to the output gamma (the input channel is always
- * linear.)  Called only with color types that have an alpha channel.  Needs the
- * from_1 tables.
- */
-static void
-png_do_encode_alpha(png_row_infop row_info, png_bytep row, png_structrp png_ptr)
-{
-   png_uint_32 row_width = row_info->width;
-
-   png_debug(1, "in png_do_encode_alpha");
-
-   if ((row_info->color_type & PNG_COLOR_MASK_ALPHA) != 0)
-   {
-      if (row_info->bit_depth == 8)
-      {
-         png_bytep table = png_ptr->gamma_from_1;
-
-         if (table != NULL)
-         {
-            int step = (row_info->color_type & PNG_COLOR_MASK_COLOR) ? 4 : 2;
-
-            /* The alpha channel is the last component: */
-            row += step - 1;
-
-            for (; row_width > 0; --row_width, row += step)
-               *row = table[*row];
-
-            return;
-         }
-      }
-
-      else if (row_info->bit_depth == 16)
-      {
-         png_uint_16pp table = png_ptr->gamma_16_from_1;
-         int gamma_shift = png_ptr->gamma_shift;
-
-         if (table != NULL)
-         {
-            int step = (row_info->color_type & PNG_COLOR_MASK_COLOR) ? 8 : 4;
-
-            /* The alpha channel is the last component: */
-            row += step - 2;
-
-            for (; row_width > 0; --row_width, row += step)
-            {
-               png_uint_16 v;
-
-               v = table[*(row + 1) >> gamma_shift][*row];
-               *row = (png_byte)((v >> 8) & 0xff);
-               *(row + 1) = (png_byte)(v & 0xff);
-            }
-
-            return;
-         }
-      }
-   }
-
-   /* Only get to here if called with a weird row_info; no harm has been done,
-    * so just issue a warning.
-    */
-   png_warning(png_ptr, "png_do_encode_alpha: unexpected call");
-}
-#endif
-
-#ifdef PNG_READ_EXPAND_SUPPORTED
-/* Expands a palette row to an RGB or RGBA row depending
- * upon whether you supply trans and num_trans.
- */
-static void
-png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info,
-    png_bytep row, png_const_colorp palette, png_const_bytep trans_alpha,
-    int num_trans)
-{
-   int shift, value;
-   png_bytep sp, dp;
-   png_uint_32 i;
-   png_uint_32 row_width=row_info->width;
-
-   png_debug(1, "in png_do_expand_palette");
-
-   if (row_info->color_type == PNG_COLOR_TYPE_PALETTE)
-   {
-      if (row_info->bit_depth < 8)
-      {
-         switch (row_info->bit_depth)
-         {
-            case 1:
-            {
-               sp = row + (size_t)((row_width - 1) >> 3);
-               dp = row + (size_t)row_width - 1;
-               shift = 7 - (int)((row_width + 7) & 0x07);
-               for (i = 0; i < row_width; i++)
-               {
-                  if ((*sp >> shift) & 0x01)
-                     *dp = 1;
-
-                  else
-                     *dp = 0;
-
-                  if (shift == 7)
-                  {
-                     shift = 0;
-                     sp--;
-                  }
-
-                  else
-                     shift++;
-
-                  dp--;
-               }
-               break;
-            }
-
-            case 2:
-            {
-               sp = row + (size_t)((row_width - 1) >> 2);
-               dp = row + (size_t)row_width - 1;
-               shift = (int)((3 - ((row_width + 3) & 0x03)) << 1);
-               for (i = 0; i < row_width; i++)
-               {
-                  value = (*sp >> shift) & 0x03;
-                  *dp = (png_byte)value;
-                  if (shift == 6)
-                  {
-                     shift = 0;
-                     sp--;
-                  }
-
-                  else
-                     shift += 2;
-
-                  dp--;
-               }
-               break;
-            }
-
-            case 4:
-            {
-               sp = row + (size_t)((row_width - 1) >> 1);
-               dp = row + (size_t)row_width - 1;
-               shift = (int)((row_width & 0x01) << 2);
-               for (i = 0; i < row_width; i++)
-               {
-                  value = (*sp >> shift) & 0x0f;
-                  *dp = (png_byte)value;
-                  if (shift == 4)
-                  {
-                     shift = 0;
-                     sp--;
-                  }
-
-                  else
-                     shift += 4;
-
-                  dp--;
-               }
-               break;
-            }
-
-            default:
-               break;
-         }
-         row_info->bit_depth = 8;
-         row_info->pixel_depth = 8;
-         row_info->rowbytes = row_width;
-      }
-
-      if (row_info->bit_depth == 8)
-      {
-         {
-            if (num_trans > 0)
-            {
-               sp = row + (size_t)row_width - 1;
-               dp = row + ((size_t)row_width << 2) - 1;
-
-               i = 0;
-#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
-               if (png_ptr->riffled_palette != NULL)
-               {
-                  /* The RGBA optimization works with png_ptr->bit_depth == 8
-                   * but sometimes row_info->bit_depth has been changed to 8.
-                   * In these cases, the palette hasn't been riffled.
-                   */
-                  i = png_do_expand_palette_rgba8_neon(png_ptr, row_info, row,
-                      &sp, &dp);
-               }
-#else
-               PNG_UNUSED(png_ptr)
-#endif
-
-               for (; i < row_width; i++)
-               {
-                  if ((int)(*sp) >= num_trans)
-                     *dp-- = 0xff;
-                  else
-                     *dp-- = trans_alpha[*sp];
-                  *dp-- = palette[*sp].blue;
-                  *dp-- = palette[*sp].green;
-                  *dp-- = palette[*sp].red;
-                  sp--;
-               }
-               row_info->bit_depth = 8;
-               row_info->pixel_depth = 32;
-               row_info->rowbytes = row_width * 4;
-               row_info->color_type = 6;
-               row_info->channels = 4;
-            }
-
-            else
-            {
-               sp = row + (size_t)row_width - 1;
-               dp = row + (size_t)(row_width * 3) - 1;
-               i = 0;
-#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
-               i = png_do_expand_palette_rgb8_neon(png_ptr, row_info, row,
-                   &sp, &dp);
-#else
-               PNG_UNUSED(png_ptr)
-#endif
-
-               for (; i < row_width; i++)
-               {
-                  *dp-- = palette[*sp].blue;
-                  *dp-- = palette[*sp].green;
-                  *dp-- = palette[*sp].red;
-                  sp--;
-               }
-
-               row_info->bit_depth = 8;
-               row_info->pixel_depth = 24;
-               row_info->rowbytes = row_width * 3;
-               row_info->color_type = 2;
-               row_info->channels = 3;
-            }
-         }
-      }
-   }
-}
-
-/* If the bit depth < 8, it is expanded to 8.  Also, if the already
- * expanded transparency value is supplied, an alpha channel is built.
- */
-static void
-png_do_expand(png_row_infop row_info, png_bytep row,
-    png_const_color_16p trans_color)
-{
-   int shift, value;
-   png_bytep sp, dp;
-   png_uint_32 i;
-   png_uint_32 row_width=row_info->width;
-
-   png_debug(1, "in png_do_expand");
-
-   if (row_info->color_type == PNG_COLOR_TYPE_GRAY)
-   {
-      unsigned int gray = trans_color != NULL ? trans_color->gray : 0;
-
-      if (row_info->bit_depth < 8)
-      {
-         switch (row_info->bit_depth)
-         {
-            case 1:
-            {
-               gray = (gray & 0x01) * 0xff;
-               sp = row + (size_t)((row_width - 1) >> 3);
-               dp = row + (size_t)row_width - 1;
-               shift = 7 - (int)((row_width + 7) & 0x07);
-               for (i = 0; i < row_width; i++)
-               {
-                  if ((*sp >> shift) & 0x01)
-                     *dp = 0xff;
-
-                  else
-                     *dp = 0;
-
-                  if (shift == 7)
-                  {
-                     shift = 0;
-                     sp--;
-                  }
-
-                  else
-                     shift++;
-
-                  dp--;
-               }
-               break;
-            }
-
-            case 2:
-            {
-               gray = (gray & 0x03) * 0x55;
-               sp = row + (size_t)((row_width - 1) >> 2);
-               dp = row + (size_t)row_width - 1;
-               shift = (int)((3 - ((row_width + 3) & 0x03)) << 1);
-               for (i = 0; i < row_width; i++)
-               {
-                  value = (*sp >> shift) & 0x03;
-                  *dp = (png_byte)(value | (value << 2) | (value << 4) |
-                     (value << 6));
-                  if (shift == 6)
-                  {
-                     shift = 0;
-                     sp--;
-                  }
-
-                  else
-                     shift += 2;
-
-                  dp--;
-               }
-               break;
-            }
-
-            case 4:
-            {
-               gray = (gray & 0x0f) * 0x11;
-               sp = row + (size_t)((row_width - 1) >> 1);
-               dp = row + (size_t)row_width - 1;
-               shift = (int)((1 - ((row_width + 1) & 0x01)) << 2);
-               for (i = 0; i < row_width; i++)
-               {
-                  value = (*sp >> shift) & 0x0f;
-                  *dp = (png_byte)(value | (value << 4));
-                  if (shift == 4)
-                  {
-                     shift = 0;
-                     sp--;
-                  }
-
-                  else
-                     shift = 4;
-
-                  dp--;
-               }
-               break;
-            }
-
-            default:
-               break;
-         }
-
-         row_info->bit_depth = 8;
-         row_info->pixel_depth = 8;
-         row_info->rowbytes = row_width;
-      }
-
-      if (trans_color != NULL)
-      {
-         if (row_info->bit_depth == 8)
-         {
-            gray = gray & 0xff;
-            sp = row + (size_t)row_width - 1;
-            dp = row + ((size_t)row_width << 1) - 1;
-
-            for (i = 0; i < row_width; i++)
-            {
-               if ((*sp & 0xffU) == gray)
-                  *dp-- = 0;
-
-               else
-                  *dp-- = 0xff;
-
-               *dp-- = *sp--;
-            }
-         }
-
-         else if (row_info->bit_depth == 16)
-         {
-            unsigned int gray_high = (gray >> 8) & 0xff;
-            unsigned int gray_low = gray & 0xff;
-            sp = row + row_info->rowbytes - 1;
-            dp = row + (row_info->rowbytes << 1) - 1;
-            for (i = 0; i < row_width; i++)
-            {
-               if ((*(sp - 1) & 0xffU) == gray_high &&
-                   (*(sp) & 0xffU) == gray_low)
-               {
-                  *dp-- = 0;
-                  *dp-- = 0;
-               }
-
-               else
-               {
-                  *dp-- = 0xff;
-                  *dp-- = 0xff;
-               }
-
-               *dp-- = *sp--;
-               *dp-- = *sp--;
-            }
-         }
-
-         row_info->color_type = PNG_COLOR_TYPE_GRAY_ALPHA;
-         row_info->channels = 2;
-         row_info->pixel_depth = (png_byte)(row_info->bit_depth << 1);
-         row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,
-             row_width);
-      }
-   }
-   else if (row_info->color_type == PNG_COLOR_TYPE_RGB &&
-       trans_color != NULL)
-   {
-      if (row_info->bit_depth == 8)
-      {
-         png_byte red = (png_byte)(trans_color->red & 0xff);
-         png_byte green = (png_byte)(trans_color->green & 0xff);
-         png_byte blue = (png_byte)(trans_color->blue & 0xff);
-         sp = row + (size_t)row_info->rowbytes - 1;
-         dp = row + ((size_t)row_width << 2) - 1;
-         for (i = 0; i < row_width; i++)
-         {
-            if (*(sp - 2) == red && *(sp - 1) == green && *(sp) == blue)
-               *dp-- = 0;
-
-            else
-               *dp-- = 0xff;
-
-            *dp-- = *sp--;
-            *dp-- = *sp--;
-            *dp-- = *sp--;
-         }
-      }
-      else if (row_info->bit_depth == 16)
-      {
-         png_byte red_high = (png_byte)((trans_color->red >> 8) & 0xff);
-         png_byte green_high = (png_byte)((trans_color->green >> 8) & 0xff);
-         png_byte blue_high = (png_byte)((trans_color->blue >> 8) & 0xff);
-         png_byte red_low = (png_byte)(trans_color->red & 0xff);
-         png_byte green_low = (png_byte)(trans_color->green & 0xff);
-         png_byte blue_low = (png_byte)(trans_color->blue & 0xff);
-         sp = row + row_info->rowbytes - 1;
-         dp = row + ((size_t)row_width << 3) - 1;
-         for (i = 0; i < row_width; i++)
-         {
-            if (*(sp - 5) == red_high &&
-                *(sp - 4) == red_low &&
-                *(sp - 3) == green_high &&
-                *(sp - 2) == green_low &&
-                *(sp - 1) == blue_high &&
-                *(sp    ) == blue_low)
-            {
-               *dp-- = 0;
-               *dp-- = 0;
-            }
-
-            else
-            {
-               *dp-- = 0xff;
-               *dp-- = 0xff;
-            }
-
-            *dp-- = *sp--;
-            *dp-- = *sp--;
-            *dp-- = *sp--;
-            *dp-- = *sp--;
-            *dp-- = *sp--;
-            *dp-- = *sp--;
-         }
-      }
-      row_info->color_type = PNG_COLOR_TYPE_RGB_ALPHA;
-      row_info->channels = 4;
-      row_info->pixel_depth = (png_byte)(row_info->bit_depth << 2);
-      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, row_width);
-   }
-}
-#endif
-
-#ifdef PNG_READ_EXPAND_16_SUPPORTED
-/* If the bit depth is 8 and the color type is not a palette type expand the
- * whole row to 16 bits.  Has no effect otherwise.
- */
-static void
-png_do_expand_16(png_row_infop row_info, png_bytep row)
-{
-   if (row_info->bit_depth == 8 &&
-      row_info->color_type != PNG_COLOR_TYPE_PALETTE)
-   {
-      /* The row have a sequence of bytes containing [0..255] and we need
-       * to turn it into another row containing [0..65535], to do this we
-       * calculate:
-       *
-       *  (input / 255) * 65535
-       *
-       *  Which happens to be exactly input * 257 and this can be achieved
-       *  simply by byte replication in place (copying backwards).
-       */
-      png_byte *sp = row + row_info->rowbytes; /* source, last byte + 1 */
-      png_byte *dp = sp + row_info->rowbytes;  /* destination, end + 1 */
-      while (dp > sp)
-      {
-         dp[-2] = dp[-1] = *--sp; dp -= 2;
-      }
-
-      row_info->rowbytes *= 2;
-      row_info->bit_depth = 16;
-      row_info->pixel_depth = (png_byte)(row_info->channels * 16);
-   }
-}
-#endif
-
-#ifdef PNG_READ_QUANTIZE_SUPPORTED
-static void
-png_do_quantize(png_row_infop row_info, png_bytep row,
-    png_const_bytep palette_lookup, png_const_bytep quantize_lookup)
-{
-   png_bytep sp, dp;
-   png_uint_32 i;
-   png_uint_32 row_width=row_info->width;
-
-   png_debug(1, "in png_do_quantize");
-
-   if (row_info->bit_depth == 8)
-   {
-      if (row_info->color_type == PNG_COLOR_TYPE_RGB && palette_lookup)
-      {
-         int r, g, b, p;
-         sp = row;
-         dp = row;
-         for (i = 0; i < row_width; i++)
-         {
-            r = *sp++;
-            g = *sp++;
-            b = *sp++;
-
-            /* This looks real messy, but the compiler will reduce
-             * it down to a reasonable formula.  For example, with
-             * 5 bits per color, we get:
-             * p = (((r >> 3) & 0x1f) << 10) |
-             *    (((g >> 3) & 0x1f) << 5) |
-             *    ((b >> 3) & 0x1f);
-             */
-            p = (((r >> (8 - PNG_QUANTIZE_RED_BITS)) &
-                ((1 << PNG_QUANTIZE_RED_BITS) - 1)) <<
-                (PNG_QUANTIZE_GREEN_BITS + PNG_QUANTIZE_BLUE_BITS)) |
-                (((g >> (8 - PNG_QUANTIZE_GREEN_BITS)) &
-                ((1 << PNG_QUANTIZE_GREEN_BITS) - 1)) <<
-                (PNG_QUANTIZE_BLUE_BITS)) |
-                ((b >> (8 - PNG_QUANTIZE_BLUE_BITS)) &
-                ((1 << PNG_QUANTIZE_BLUE_BITS) - 1));
-
-            *dp++ = palette_lookup[p];
-         }
-
-         row_info->color_type = PNG_COLOR_TYPE_PALETTE;
-         row_info->channels = 1;
-         row_info->pixel_depth = row_info->bit_depth;
-         row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, row_width);
-      }
-
-      else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA &&
-         palette_lookup != NULL)
-      {
-         int r, g, b, p;
-         sp = row;
-         dp = row;
-         for (i = 0; i < row_width; i++)
-         {
-            r = *sp++;
-            g = *sp++;
-            b = *sp++;
-            sp++;
-
-            p = (((r >> (8 - PNG_QUANTIZE_RED_BITS)) &
-                ((1 << PNG_QUANTIZE_RED_BITS) - 1)) <<
-                (PNG_QUANTIZE_GREEN_BITS + PNG_QUANTIZE_BLUE_BITS)) |
-                (((g >> (8 - PNG_QUANTIZE_GREEN_BITS)) &
-                ((1 << PNG_QUANTIZE_GREEN_BITS) - 1)) <<
-                (PNG_QUANTIZE_BLUE_BITS)) |
-                ((b >> (8 - PNG_QUANTIZE_BLUE_BITS)) &
-                ((1 << PNG_QUANTIZE_BLUE_BITS) - 1));
-
-            *dp++ = palette_lookup[p];
-         }
-
-         row_info->color_type = PNG_COLOR_TYPE_PALETTE;
-         row_info->channels = 1;
-         row_info->pixel_depth = row_info->bit_depth;
-         row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, row_width);
-      }
-
-      else if (row_info->color_type == PNG_COLOR_TYPE_PALETTE &&
-         quantize_lookup)
-      {
-         sp = row;
-
-         for (i = 0; i < row_width; i++, sp++)
-         {
-            *sp = quantize_lookup[*sp];
-         }
-      }
-   }
-}
-#endif /* READ_QUANTIZE */
-
-/* Transform the row.  The order of transformations is significant,
- * and is very touchy.  If you add a transformation, take care to
- * decide how it fits in with the other transformations here.
- */
-void /* PRIVATE */
-png_do_read_transformations(png_structrp png_ptr, png_row_infop row_info)
-{
-   png_debug(1, "in png_do_read_transformations");
-
-   if (png_ptr->row_buf == NULL)
-   {
-      /* Prior to 1.5.4 this output row/pass where the NULL pointer is, but this
-       * error is incredibly rare and incredibly easy to debug without this
-       * information.
-       */
-      png_error(png_ptr, "NULL row buffer");
-   }
-
-   /* The following is debugging; prior to 1.5.4 the code was never compiled in;
-    * in 1.5.4 PNG_FLAG_DETECT_UNINITIALIZED was added and the macro
-    * PNG_WARN_UNINITIALIZED_ROW removed.  In 1.6 the new flag is set only for
-    * all transformations, however in practice the ROW_INIT always gets done on
-    * demand, if necessary.
-    */
-   if ((png_ptr->flags & PNG_FLAG_DETECT_UNINITIALIZED) != 0 &&
-       (png_ptr->flags & PNG_FLAG_ROW_INIT) == 0)
-   {
-      /* Application has failed to call either png_read_start_image() or
-       * png_read_update_info() after setting transforms that expand pixels.
-       * This check added to libpng-1.2.19 (but not enabled until 1.5.4).
-       */
-      png_error(png_ptr, "Uninitialized row");
-   }
-
-#ifdef PNG_READ_EXPAND_SUPPORTED
-   if ((png_ptr->transformations & PNG_EXPAND) != 0)
-   {
-      if (row_info->color_type == PNG_COLOR_TYPE_PALETTE)
-      {
-#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
-         if ((png_ptr->num_trans > 0) && (png_ptr->bit_depth == 8))
-         {
-            if (png_ptr->riffled_palette == NULL)
-            {
-               /* Initialize the accelerated palette expansion. */
-               png_ptr->riffled_palette =
-                   (png_bytep)png_malloc(png_ptr, 256 * 4);
-               png_riffle_palette_neon(png_ptr);
-            }
-         }
-#endif
-         png_do_expand_palette(png_ptr, row_info, png_ptr->row_buf + 1,
-             png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans);
-      }
-
-      else
-      {
-         if (png_ptr->num_trans != 0 &&
-             (png_ptr->transformations & PNG_EXPAND_tRNS) != 0)
-            png_do_expand(row_info, png_ptr->row_buf + 1,
-                &(png_ptr->trans_color));
-
-         else
-            png_do_expand(row_info, png_ptr->row_buf + 1, NULL);
-      }
-   }
-#endif
-
-#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED
-   if ((png_ptr->transformations & PNG_STRIP_ALPHA) != 0 &&
-       (png_ptr->transformations & PNG_COMPOSE) == 0 &&
-       (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA ||
-       row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA))
-      png_do_strip_channel(row_info, png_ptr->row_buf + 1,
-          0 /* at_start == false, because SWAP_ALPHA happens later */);
-#endif
-
-#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
-   if ((png_ptr->transformations & PNG_RGB_TO_GRAY) != 0)
-   {
-      int rgb_error =
-          png_do_rgb_to_gray(png_ptr, row_info,
-              png_ptr->row_buf + 1);
-
-      if (rgb_error != 0)
-      {
-         png_ptr->rgb_to_gray_status=1;
-         if ((png_ptr->transformations & PNG_RGB_TO_GRAY) ==
-             PNG_RGB_TO_GRAY_WARN)
-            png_warning(png_ptr, "png_do_rgb_to_gray found nongray pixel");
-
-         if ((png_ptr->transformations & PNG_RGB_TO_GRAY) ==
-             PNG_RGB_TO_GRAY_ERR)
-            png_error(png_ptr, "png_do_rgb_to_gray found nongray pixel");
-      }
-   }
-#endif
-
-/* From Andreas Dilger e-mail to png-implement, 26 March 1998:
- *
- *   In most cases, the "simple transparency" should be done prior to doing
- *   gray-to-RGB, or you will have to test 3x as many bytes to check if a
- *   pixel is transparent.  You would also need to make sure that the
- *   transparency information is upgraded to RGB.
- *
- *   To summarize, the current flow is:
- *   - Gray + simple transparency -> compare 1 or 2 gray bytes and composite
- *                                   with background "in place" if transparent,
- *                                   convert to RGB if necessary
- *   - Gray + alpha -> composite with gray background and remove alpha bytes,
- *                                   convert to RGB if necessary
- *
- *   To support RGB backgrounds for gray images we need:
- *   - Gray + simple transparency -> convert to RGB + simple transparency,
- *                                   compare 3 or 6 bytes and composite with
- *                                   background "in place" if transparent
- *                                   (3x compare/pixel compared to doing
- *                                   composite with gray bkgrnd)
- *   - Gray + alpha -> convert to RGB + alpha, composite with background and
- *                                   remove alpha bytes (3x float
- *                                   operations/pixel compared with composite
- *                                   on gray background)
- *
- *  Greg's change will do this.  The reason it wasn't done before is for
- *  performance, as this increases the per-pixel operations.  If we would check
- *  in advance if the background was gray or RGB, and position the gray-to-RGB
- *  transform appropriately, then it would save a lot of work/time.
- */
-
-#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
-   /* If gray -> RGB, do so now only if background is non-gray; else do later
-    * for performance reasons
-    */
-   if ((png_ptr->transformations & PNG_GRAY_TO_RGB) != 0 &&
-       (png_ptr->mode & PNG_BACKGROUND_IS_GRAY) == 0)
-      png_do_gray_to_rgb(row_info, png_ptr->row_buf + 1);
-#endif
-
-#if defined(PNG_READ_BACKGROUND_SUPPORTED) ||\
-   defined(PNG_READ_ALPHA_MODE_SUPPORTED)
-   if ((png_ptr->transformations & PNG_COMPOSE) != 0)
-      png_do_compose(row_info, png_ptr->row_buf + 1, png_ptr);
-#endif
-
-#ifdef PNG_READ_GAMMA_SUPPORTED
-   if ((png_ptr->transformations & PNG_GAMMA) != 0 &&
-#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
-      /* Because RGB_TO_GRAY does the gamma transform. */
-      (png_ptr->transformations & PNG_RGB_TO_GRAY) == 0 &&
-#endif
-#if defined(PNG_READ_BACKGROUND_SUPPORTED) ||\
-   defined(PNG_READ_ALPHA_MODE_SUPPORTED)
-      /* Because PNG_COMPOSE does the gamma transform if there is something to
-       * do (if there is an alpha channel or transparency.)
-       */
-       !((png_ptr->transformations & PNG_COMPOSE) != 0 &&
-       ((png_ptr->num_trans != 0) ||
-       (png_ptr->color_type & PNG_COLOR_MASK_ALPHA) != 0)) &&
-#endif
-      /* Because png_init_read_transformations transforms the palette, unless
-       * RGB_TO_GRAY will do the transform.
-       */
-       (png_ptr->color_type != PNG_COLOR_TYPE_PALETTE))
-      png_do_gamma(row_info, png_ptr->row_buf + 1, png_ptr);
-#endif
-
-#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED
-   if ((png_ptr->transformations & PNG_STRIP_ALPHA) != 0 &&
-       (png_ptr->transformations & PNG_COMPOSE) != 0 &&
-       (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA ||
-       row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA))
-      png_do_strip_channel(row_info, png_ptr->row_buf + 1,
-          0 /* at_start == false, because SWAP_ALPHA happens later */);
-#endif
-
-#ifdef PNG_READ_ALPHA_MODE_SUPPORTED
-   if ((png_ptr->transformations & PNG_ENCODE_ALPHA) != 0 &&
-       (row_info->color_type & PNG_COLOR_MASK_ALPHA) != 0)
-      png_do_encode_alpha(row_info, png_ptr->row_buf + 1, png_ptr);
-#endif
-
-#ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED
-   if ((png_ptr->transformations & PNG_SCALE_16_TO_8) != 0)
-      png_do_scale_16_to_8(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED
-   /* There is no harm in doing both of these because only one has any effect,
-    * by putting the 'scale' option first if the app asks for scale (either by
-    * calling the API or in a TRANSFORM flag) this is what happens.
-    */
-   if ((png_ptr->transformations & PNG_16_TO_8) != 0)
-      png_do_chop(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_READ_QUANTIZE_SUPPORTED
-   if ((png_ptr->transformations & PNG_QUANTIZE) != 0)
-   {
-      png_do_quantize(row_info, png_ptr->row_buf + 1,
-          png_ptr->palette_lookup, png_ptr->quantize_index);
-
-      if (row_info->rowbytes == 0)
-         png_error(png_ptr, "png_do_quantize returned rowbytes=0");
-   }
-#endif /* READ_QUANTIZE */
-
-#ifdef PNG_READ_EXPAND_16_SUPPORTED
-   /* Do the expansion now, after all the arithmetic has been done.  Notice
-    * that previous transformations can handle the PNG_EXPAND_16 flag if this
-    * is efficient (particularly true in the case of gamma correction, where
-    * better accuracy results faster!)
-    */
-   if ((png_ptr->transformations & PNG_EXPAND_16) != 0)
-      png_do_expand_16(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
-   /* NOTE: moved here in 1.5.4 (from much later in this list.) */
-   if ((png_ptr->transformations & PNG_GRAY_TO_RGB) != 0 &&
-       (png_ptr->mode & PNG_BACKGROUND_IS_GRAY) != 0)
-      png_do_gray_to_rgb(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_READ_INVERT_SUPPORTED
-   if ((png_ptr->transformations & PNG_INVERT_MONO) != 0)
-      png_do_invert(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_READ_INVERT_ALPHA_SUPPORTED
-   if ((png_ptr->transformations & PNG_INVERT_ALPHA) != 0)
-      png_do_read_invert_alpha(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_READ_SHIFT_SUPPORTED
-   if ((png_ptr->transformations & PNG_SHIFT) != 0)
-      png_do_unshift(row_info, png_ptr->row_buf + 1,
-          &(png_ptr->shift));
-#endif
-
-#ifdef PNG_READ_PACK_SUPPORTED
-   if ((png_ptr->transformations & PNG_PACK) != 0)
-      png_do_unpack(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED
-   /* Added at libpng-1.5.10 */
-   if (row_info->color_type == PNG_COLOR_TYPE_PALETTE &&
-       png_ptr->num_palette_max >= 0)
-      png_do_check_palette_indexes(png_ptr, row_info);
-#endif
-
-#ifdef PNG_READ_BGR_SUPPORTED
-   if ((png_ptr->transformations & PNG_BGR) != 0)
-      png_do_bgr(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_READ_PACKSWAP_SUPPORTED
-   if ((png_ptr->transformations & PNG_PACKSWAP) != 0)
-      png_do_packswap(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_READ_FILLER_SUPPORTED
-   if ((png_ptr->transformations & PNG_FILLER) != 0)
-      png_do_read_filler(row_info, png_ptr->row_buf + 1,
-          (png_uint_32)png_ptr->filler, png_ptr->flags);
-#endif
-
-#ifdef PNG_READ_SWAP_ALPHA_SUPPORTED
-   if ((png_ptr->transformations & PNG_SWAP_ALPHA) != 0)
-      png_do_read_swap_alpha(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_READ_16BIT_SUPPORTED
-#ifdef PNG_READ_SWAP_SUPPORTED
-   if ((png_ptr->transformations & PNG_SWAP_BYTES) != 0)
-      png_do_swap(row_info, png_ptr->row_buf + 1);
-#endif
-#endif
-
-#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
-   if ((png_ptr->transformations & PNG_USER_TRANSFORM) != 0)
-   {
-      if (png_ptr->read_user_transform_fn != NULL)
-         (*(png_ptr->read_user_transform_fn)) /* User read transform function */
-             (png_ptr,     /* png_ptr */
-             row_info,     /* row_info: */
-                /*  png_uint_32 width;       width of row */
-                /*  size_t rowbytes;         number of bytes in row */
-                /*  png_byte color_type;     color type of pixels */
-                /*  png_byte bit_depth;      bit depth of samples */
-                /*  png_byte channels;       number of channels (1-4) */
-                /*  png_byte pixel_depth;    bits per pixel (depth*channels) */
-             png_ptr->row_buf + 1);    /* start of pixel data for row */
-#ifdef PNG_USER_TRANSFORM_PTR_SUPPORTED
-      if (png_ptr->user_transform_depth != 0)
-         row_info->bit_depth = png_ptr->user_transform_depth;
-
-      if (png_ptr->user_transform_channels != 0)
-         row_info->channels = png_ptr->user_transform_channels;
-#endif
-      row_info->pixel_depth = (png_byte)(row_info->bit_depth *
-          row_info->channels);
-
-      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, row_info->width);
-   }
-#endif
-}
-
-#endif /* READ_TRANSFORMS */
-#endif /* READ */
diff --git a/dep/libpng/src/pngrutil.c b/dep/libpng/src/pngrutil.c
deleted file mode 100644
index d31dc21da..000000000
--- a/dep/libpng/src/pngrutil.c
+++ /dev/null
@@ -1,4680 +0,0 @@
-
-/* pngrutil.c - utilities to read a PNG file
- *
- * Copyright (c) 2018-2024 Cosmin Truta
- * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
- * Copyright (c) 1996-1997 Andreas Dilger
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- *
- * This file contains routines that are only called from within
- * libpng itself during the course of reading an image.
- */
-
-#include "pngpriv.h"
-
-#ifdef PNG_READ_SUPPORTED
-
-png_uint_32 PNGAPI
-png_get_uint_31(png_const_structrp png_ptr, png_const_bytep buf)
-{
-   png_uint_32 uval = png_get_uint_32(buf);
-
-   if (uval > PNG_UINT_31_MAX)
-      png_error(png_ptr, "PNG unsigned integer out of range");
-
-   return uval;
-}
-
-#if defined(PNG_READ_gAMA_SUPPORTED) || defined(PNG_READ_cHRM_SUPPORTED)
-/* The following is a variation on the above for use with the fixed
- * point values used for gAMA and cHRM.  Instead of png_error it
- * issues a warning and returns (-1) - an invalid value because both
- * gAMA and cHRM use *unsigned* integers for fixed point values.
- */
-#define PNG_FIXED_ERROR (-1)
-
-static png_fixed_point /* PRIVATE */
-png_get_fixed_point(png_structrp png_ptr, png_const_bytep buf)
-{
-   png_uint_32 uval = png_get_uint_32(buf);
-
-   if (uval <= PNG_UINT_31_MAX)
-      return (png_fixed_point)uval; /* known to be in range */
-
-   /* The caller can turn off the warning by passing NULL. */
-   if (png_ptr != NULL)
-      png_warning(png_ptr, "PNG fixed point integer out of range");
-
-   return PNG_FIXED_ERROR;
-}
-#endif
-
-#ifdef PNG_READ_INT_FUNCTIONS_SUPPORTED
-/* NOTE: the read macros will obscure these definitions, so that if
- * PNG_USE_READ_MACROS is set the library will not use them internally,
- * but the APIs will still be available externally.
- *
- * The parentheses around "PNGAPI function_name" in the following three
- * functions are necessary because they allow the macros to co-exist with
- * these (unused but exported) functions.
- */
-
-/* Grab an unsigned 32-bit integer from a buffer in big-endian format. */
-png_uint_32 (PNGAPI
-png_get_uint_32)(png_const_bytep buf)
-{
-   png_uint_32 uval =
-       ((png_uint_32)(*(buf    )) << 24) +
-       ((png_uint_32)(*(buf + 1)) << 16) +
-       ((png_uint_32)(*(buf + 2)) <<  8) +
-       ((png_uint_32)(*(buf + 3))      ) ;
-
-   return uval;
-}
-
-/* Grab a signed 32-bit integer from a buffer in big-endian format.  The
- * data is stored in the PNG file in two's complement format and there
- * is no guarantee that a 'png_int_32' is exactly 32 bits, therefore
- * the following code does a two's complement to native conversion.
- */
-png_int_32 (PNGAPI
-png_get_int_32)(png_const_bytep buf)
-{
-   png_uint_32 uval = png_get_uint_32(buf);
-   if ((uval & 0x80000000) == 0) /* non-negative */
-      return (png_int_32)uval;
-
-   uval = (uval ^ 0xffffffff) + 1;  /* 2's complement: -x = ~x+1 */
-   if ((uval & 0x80000000) == 0) /* no overflow */
-      return -(png_int_32)uval;
-   /* The following has to be safe; this function only gets called on PNG data
-    * and if we get here that data is invalid.  0 is the most safe value and
-    * if not then an attacker would surely just generate a PNG with 0 instead.
-    */
-   return 0;
-}
-
-/* Grab an unsigned 16-bit integer from a buffer in big-endian format. */
-png_uint_16 (PNGAPI
-png_get_uint_16)(png_const_bytep buf)
-{
-   /* ANSI-C requires an int value to accommodate at least 16 bits so this
-    * works and allows the compiler not to worry about possible narrowing
-    * on 32-bit systems.  (Pre-ANSI systems did not make integers smaller
-    * than 16 bits either.)
-    */
-   unsigned int val =
-       ((unsigned int)(*buf) << 8) +
-       ((unsigned int)(*(buf + 1)));
-
-   return (png_uint_16)val;
-}
-
-#endif /* READ_INT_FUNCTIONS */
-
-/* Read and check the PNG file signature */
-void /* PRIVATE */
-png_read_sig(png_structrp png_ptr, png_inforp info_ptr)
-{
-   size_t num_checked, num_to_check;
-
-   /* Exit if the user application does not expect a signature. */
-   if (png_ptr->sig_bytes >= 8)
-      return;
-
-   num_checked = png_ptr->sig_bytes;
-   num_to_check = 8 - num_checked;
-
-#ifdef PNG_IO_STATE_SUPPORTED
-   png_ptr->io_state = PNG_IO_READING | PNG_IO_SIGNATURE;
-#endif
-
-   /* The signature must be serialized in a single I/O call. */
-   png_read_data(png_ptr, &(info_ptr->signature[num_checked]), num_to_check);
-   png_ptr->sig_bytes = 8;
-
-   if (png_sig_cmp(info_ptr->signature, num_checked, num_to_check) != 0)
-   {
-      if (num_checked < 4 &&
-          png_sig_cmp(info_ptr->signature, num_checked, num_to_check - 4) != 0)
-         png_error(png_ptr, "Not a PNG file");
-      else
-         png_error(png_ptr, "PNG file corrupted by ASCII conversion");
-   }
-   if (num_checked < 3)
-      png_ptr->mode |= PNG_HAVE_PNG_SIGNATURE;
-}
-
-/* Read the chunk header (length + type name).
- * Put the type name into png_ptr->chunk_name, and return the length.
- */
-png_uint_32 /* PRIVATE */
-png_read_chunk_header(png_structrp png_ptr)
-{
-   png_byte buf[8];
-   png_uint_32 length;
-
-#ifdef PNG_IO_STATE_SUPPORTED
-   png_ptr->io_state = PNG_IO_READING | PNG_IO_CHUNK_HDR;
-#endif
-
-   /* Read the length and the chunk name.
-    * This must be performed in a single I/O call.
-    */
-   png_read_data(png_ptr, buf, 8);
-   length = png_get_uint_31(png_ptr, buf);
-
-   /* Put the chunk name into png_ptr->chunk_name. */
-   png_ptr->chunk_name = PNG_CHUNK_FROM_STRING(buf+4);
-
-   png_debug2(0, "Reading chunk typeid = 0x%lx, length = %lu",
-       (unsigned long)png_ptr->chunk_name, (unsigned long)length);
-
-   /* Reset the crc and run it over the chunk name. */
-   png_reset_crc(png_ptr);
-   png_calculate_crc(png_ptr, buf + 4, 4);
-
-   /* Check to see if chunk name is valid. */
-   png_check_chunk_name(png_ptr, png_ptr->chunk_name);
-
-   /* Check for too-large chunk length */
-   png_check_chunk_length(png_ptr, length);
-
-#ifdef PNG_IO_STATE_SUPPORTED
-   png_ptr->io_state = PNG_IO_READING | PNG_IO_CHUNK_DATA;
-#endif
-
-   return length;
-}
-
-/* Read data, and (optionally) run it through the CRC. */
-void /* PRIVATE */
-png_crc_read(png_structrp png_ptr, png_bytep buf, png_uint_32 length)
-{
-   if (png_ptr == NULL)
-      return;
-
-   png_read_data(png_ptr, buf, length);
-   png_calculate_crc(png_ptr, buf, length);
-}
-
-/* Optionally skip data and then check the CRC.  Depending on whether we
- * are reading an ancillary or critical chunk, and how the program has set
- * things up, we may calculate the CRC on the data and print a message.
- * Returns '1' if there was a CRC error, '0' otherwise.
- */
-int /* PRIVATE */
-png_crc_finish(png_structrp png_ptr, png_uint_32 skip)
-{
-   /* The size of the local buffer for inflate is a good guess as to a
-    * reasonable size to use for buffering reads from the application.
-    */
-   while (skip > 0)
-   {
-      png_uint_32 len;
-      png_byte tmpbuf[PNG_INFLATE_BUF_SIZE];
-
-      len = (sizeof tmpbuf);
-      if (len > skip)
-         len = skip;
-      skip -= len;
-
-      png_crc_read(png_ptr, tmpbuf, len);
-   }
-
-   if (png_crc_error(png_ptr) != 0)
-   {
-      if (PNG_CHUNK_ANCILLARY(png_ptr->chunk_name) != 0 ?
-          (png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_NOWARN) == 0 :
-          (png_ptr->flags & PNG_FLAG_CRC_CRITICAL_USE) != 0)
-      {
-         png_chunk_warning(png_ptr, "CRC error");
-      }
-
-      else
-         png_chunk_error(png_ptr, "CRC error");
-
-      return 1;
-   }
-
-   return 0;
-}
-
-/* Compare the CRC stored in the PNG file with that calculated by libpng from
- * the data it has read thus far.
- */
-int /* PRIVATE */
-png_crc_error(png_structrp png_ptr)
-{
-   png_byte crc_bytes[4];
-   png_uint_32 crc;
-   int need_crc = 1;
-
-   if (PNG_CHUNK_ANCILLARY(png_ptr->chunk_name) != 0)
-   {
-      if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_MASK) ==
-          (PNG_FLAG_CRC_ANCILLARY_USE | PNG_FLAG_CRC_ANCILLARY_NOWARN))
-         need_crc = 0;
-   }
-
-   else /* critical */
-   {
-      if ((png_ptr->flags & PNG_FLAG_CRC_CRITICAL_IGNORE) != 0)
-         need_crc = 0;
-   }
-
-#ifdef PNG_IO_STATE_SUPPORTED
-   png_ptr->io_state = PNG_IO_READING | PNG_IO_CHUNK_CRC;
-#endif
-
-   /* The chunk CRC must be serialized in a single I/O call. */
-   png_read_data(png_ptr, crc_bytes, 4);
-
-   if (need_crc != 0)
-   {
-      crc = png_get_uint_32(crc_bytes);
-      return crc != png_ptr->crc;
-   }
-
-   else
-      return 0;
-}
-
-#if defined(PNG_READ_iCCP_SUPPORTED) || defined(PNG_READ_iTXt_SUPPORTED) ||\
-    defined(PNG_READ_pCAL_SUPPORTED) || defined(PNG_READ_sCAL_SUPPORTED) ||\
-    defined(PNG_READ_sPLT_SUPPORTED) || defined(PNG_READ_tEXt_SUPPORTED) ||\
-    defined(PNG_READ_zTXt_SUPPORTED) || defined(PNG_SEQUENTIAL_READ_SUPPORTED)
-/* Manage the read buffer; this simply reallocates the buffer if it is not small
- * enough (or if it is not allocated).  The routine returns a pointer to the
- * buffer; if an error occurs and 'warn' is set the routine returns NULL, else
- * it will call png_error (via png_malloc) on failure.  (warn == 2 means
- * 'silent').
- */
-static png_bytep
-png_read_buffer(png_structrp png_ptr, png_alloc_size_t new_size, int warn)
-{
-   png_bytep buffer = png_ptr->read_buffer;
-
-   if (buffer != NULL && new_size > png_ptr->read_buffer_size)
-   {
-      png_ptr->read_buffer = NULL;
-      png_ptr->read_buffer_size = 0;
-      png_free(png_ptr, buffer);
-      buffer = NULL;
-   }
-
-   if (buffer == NULL)
-   {
-      buffer = png_voidcast(png_bytep, png_malloc_base(png_ptr, new_size));
-
-      if (buffer != NULL)
-      {
-         memset(buffer, 0, new_size); /* just in case */
-         png_ptr->read_buffer = buffer;
-         png_ptr->read_buffer_size = new_size;
-      }
-
-      else if (warn < 2) /* else silent */
-      {
-         if (warn != 0)
-             png_chunk_warning(png_ptr, "insufficient memory to read chunk");
-
-         else
-             png_chunk_error(png_ptr, "insufficient memory to read chunk");
-      }
-   }
-
-   return buffer;
-}
-#endif /* READ_iCCP|iTXt|pCAL|sCAL|sPLT|tEXt|zTXt|SEQUENTIAL_READ */
-
-/* png_inflate_claim: claim the zstream for some nefarious purpose that involves
- * decompression.  Returns Z_OK on success, else a zlib error code.  It checks
- * the owner but, in final release builds, just issues a warning if some other
- * chunk apparently owns the stream.  Prior to release it does a png_error.
- */
-static int
-png_inflate_claim(png_structrp png_ptr, png_uint_32 owner)
-{
-   if (png_ptr->zowner != 0)
-   {
-      char msg[64];
-
-      PNG_STRING_FROM_CHUNK(msg, png_ptr->zowner);
-      /* So the message that results is "<chunk> using zstream"; this is an
-       * internal error, but is very useful for debugging.  i18n requirements
-       * are minimal.
-       */
-      (void)png_safecat(msg, (sizeof msg), 4, " using zstream");
-#if PNG_RELEASE_BUILD
-      png_chunk_warning(png_ptr, msg);
-      png_ptr->zowner = 0;
-#else
-      png_chunk_error(png_ptr, msg);
-#endif
-   }
-
-   /* Implementation note: unlike 'png_deflate_claim' this internal function
-    * does not take the size of the data as an argument.  Some efficiency could
-    * be gained by using this when it is known *if* the zlib stream itself does
-    * not record the number; however, this is an illusion: the original writer
-    * of the PNG may have selected a lower window size, and we really must
-    * follow that because, for systems with with limited capabilities, we
-    * would otherwise reject the application's attempts to use a smaller window
-    * size (zlib doesn't have an interface to say "this or lower"!).
-    *
-    * inflateReset2 was added to zlib 1.2.4; before this the window could not be
-    * reset, therefore it is necessary to always allocate the maximum window
-    * size with earlier zlibs just in case later compressed chunks need it.
-    */
-   {
-      int ret; /* zlib return code */
-#if ZLIB_VERNUM >= 0x1240
-      int window_bits = 0;
-
-# if defined(PNG_SET_OPTION_SUPPORTED) && defined(PNG_MAXIMUM_INFLATE_WINDOW)
-      if (((png_ptr->options >> PNG_MAXIMUM_INFLATE_WINDOW) & 3) ==
-          PNG_OPTION_ON)
-      {
-         window_bits = 15;
-         png_ptr->zstream_start = 0; /* fixed window size */
-      }
-
-      else
-      {
-         png_ptr->zstream_start = 1;
-      }
-# endif
-
-#endif /* ZLIB_VERNUM >= 0x1240 */
-
-      /* Set this for safety, just in case the previous owner left pointers to
-       * memory allocations.
-       */
-      png_ptr->zstream.next_in = NULL;
-      png_ptr->zstream.avail_in = 0;
-      png_ptr->zstream.next_out = NULL;
-      png_ptr->zstream.avail_out = 0;
-
-      if ((png_ptr->flags & PNG_FLAG_ZSTREAM_INITIALIZED) != 0)
-      {
-#if ZLIB_VERNUM >= 0x1240
-         ret = inflateReset2(&png_ptr->zstream, window_bits);
-#else
-         ret = inflateReset(&png_ptr->zstream);
-#endif
-      }
-
-      else
-      {
-#if ZLIB_VERNUM >= 0x1240
-         ret = inflateInit2(&png_ptr->zstream, window_bits);
-#else
-         ret = inflateInit(&png_ptr->zstream);
-#endif
-
-         if (ret == Z_OK)
-            png_ptr->flags |= PNG_FLAG_ZSTREAM_INITIALIZED;
-      }
-
-#ifdef PNG_DISABLE_ADLER32_CHECK_SUPPORTED
-      if (((png_ptr->options >> PNG_IGNORE_ADLER32) & 3) == PNG_OPTION_ON)
-         /* Turn off validation of the ADLER32 checksum in IDAT chunks */
-         ret = inflateValidate(&png_ptr->zstream, 0);
-#endif
-
-      if (ret == Z_OK)
-         png_ptr->zowner = owner;
-
-      else
-         png_zstream_error(png_ptr, ret);
-
-      return ret;
-   }
-
-#ifdef window_bits
-# undef window_bits
-#endif
-}
-
-#if ZLIB_VERNUM >= 0x1240
-/* Handle the start of the inflate stream if we called inflateInit2(strm,0);
- * in this case some zlib versions skip validation of the CINFO field and, in
- * certain circumstances, libpng may end up displaying an invalid image, in
- * contrast to implementations that call zlib in the normal way (e.g. libpng
- * 1.5).
- */
-int /* PRIVATE */
-png_zlib_inflate(png_structrp png_ptr, int flush)
-{
-   if (png_ptr->zstream_start && png_ptr->zstream.avail_in > 0)
-   {
-      if ((*png_ptr->zstream.next_in >> 4) > 7)
-      {
-         png_ptr->zstream.msg = "invalid window size (libpng)";
-         return Z_DATA_ERROR;
-      }
-
-      png_ptr->zstream_start = 0;
-   }
-
-   return inflate(&png_ptr->zstream, flush);
-}
-#endif /* Zlib >= 1.2.4 */
-
-#ifdef PNG_READ_COMPRESSED_TEXT_SUPPORTED
-#if defined(PNG_READ_zTXt_SUPPORTED) || defined (PNG_READ_iTXt_SUPPORTED)
-/* png_inflate now returns zlib error codes including Z_OK and Z_STREAM_END to
- * allow the caller to do multiple calls if required.  If the 'finish' flag is
- * set Z_FINISH will be passed to the final inflate() call and Z_STREAM_END must
- * be returned or there has been a problem, otherwise Z_SYNC_FLUSH is used and
- * Z_OK or Z_STREAM_END will be returned on success.
- *
- * The input and output sizes are updated to the actual amounts of data consumed
- * or written, not the amount available (as in a z_stream).  The data pointers
- * are not changed, so the next input is (data+input_size) and the next
- * available output is (output+output_size).
- */
-static int
-png_inflate(png_structrp png_ptr, png_uint_32 owner, int finish,
-    /* INPUT: */ png_const_bytep input, png_uint_32p input_size_ptr,
-    /* OUTPUT: */ png_bytep output, png_alloc_size_t *output_size_ptr)
-{
-   if (png_ptr->zowner == owner) /* Else not claimed */
-   {
-      int ret;
-      png_alloc_size_t avail_out = *output_size_ptr;
-      png_uint_32 avail_in = *input_size_ptr;
-
-      /* zlib can't necessarily handle more than 65535 bytes at once (i.e. it
-       * can't even necessarily handle 65536 bytes) because the type uInt is
-       * "16 bits or more".  Consequently it is necessary to chunk the input to
-       * zlib.  This code uses ZLIB_IO_MAX, from pngpriv.h, as the maximum (the
-       * maximum value that can be stored in a uInt.)  It is possible to set
-       * ZLIB_IO_MAX to a lower value in pngpriv.h and this may sometimes have
-       * a performance advantage, because it reduces the amount of data accessed
-       * at each step and that may give the OS more time to page it in.
-       */
-      png_ptr->zstream.next_in = PNGZ_INPUT_CAST(input);
-      /* avail_in and avail_out are set below from 'size' */
-      png_ptr->zstream.avail_in = 0;
-      png_ptr->zstream.avail_out = 0;
-
-      /* Read directly into the output if it is available (this is set to
-       * a local buffer below if output is NULL).
-       */
-      if (output != NULL)
-         png_ptr->zstream.next_out = output;
-
-      do
-      {
-         uInt avail;
-         Byte local_buffer[PNG_INFLATE_BUF_SIZE];
-
-         /* zlib INPUT BUFFER */
-         /* The setting of 'avail_in' used to be outside the loop; by setting it
-          * inside it is possible to chunk the input to zlib and simply rely on
-          * zlib to advance the 'next_in' pointer.  This allows arbitrary
-          * amounts of data to be passed through zlib at the unavoidable cost of
-          * requiring a window save (memcpy of up to 32768 output bytes)
-          * every ZLIB_IO_MAX input bytes.
-          */
-         avail_in += png_ptr->zstream.avail_in; /* not consumed last time */
-
-         avail = ZLIB_IO_MAX;
-
-         if (avail_in < avail)
-            avail = (uInt)avail_in; /* safe: < than ZLIB_IO_MAX */
-
-         avail_in -= avail;
-         png_ptr->zstream.avail_in = avail;
-
-         /* zlib OUTPUT BUFFER */
-         avail_out += png_ptr->zstream.avail_out; /* not written last time */
-
-         avail = ZLIB_IO_MAX; /* maximum zlib can process */
-
-         if (output == NULL)
-         {
-            /* Reset the output buffer each time round if output is NULL and
-             * make available the full buffer, up to 'remaining_space'
-             */
-            png_ptr->zstream.next_out = local_buffer;
-            if ((sizeof local_buffer) < avail)
-               avail = (sizeof local_buffer);
-         }
-
-         if (avail_out < avail)
-            avail = (uInt)avail_out; /* safe: < ZLIB_IO_MAX */
-
-         png_ptr->zstream.avail_out = avail;
-         avail_out -= avail;
-
-         /* zlib inflate call */
-         /* In fact 'avail_out' may be 0 at this point, that happens at the end
-          * of the read when the final LZ end code was not passed at the end of
-          * the previous chunk of input data.  Tell zlib if we have reached the
-          * end of the output buffer.
-          */
-         ret = PNG_INFLATE(png_ptr, avail_out > 0 ? Z_NO_FLUSH :
-             (finish ? Z_FINISH : Z_SYNC_FLUSH));
-      } while (ret == Z_OK);
-
-      /* For safety kill the local buffer pointer now */
-      if (output == NULL)
-         png_ptr->zstream.next_out = NULL;
-
-      /* Claw back the 'size' and 'remaining_space' byte counts. */
-      avail_in += png_ptr->zstream.avail_in;
-      avail_out += png_ptr->zstream.avail_out;
-
-      /* Update the input and output sizes; the updated values are the amount
-       * consumed or written, effectively the inverse of what zlib uses.
-       */
-      if (avail_out > 0)
-         *output_size_ptr -= avail_out;
-
-      if (avail_in > 0)
-         *input_size_ptr -= avail_in;
-
-      /* Ensure png_ptr->zstream.msg is set (even in the success case!) */
-      png_zstream_error(png_ptr, ret);
-      return ret;
-   }
-
-   else
-   {
-      /* This is a bad internal error.  The recovery assigns to the zstream msg
-       * pointer, which is not owned by the caller, but this is safe; it's only
-       * used on errors!
-       */
-      png_ptr->zstream.msg = PNGZ_MSG_CAST("zstream unclaimed");
-      return Z_STREAM_ERROR;
-   }
-}
-
-/*
- * Decompress trailing data in a chunk.  The assumption is that read_buffer
- * points at an allocated area holding the contents of a chunk with a
- * trailing compressed part.  What we get back is an allocated area
- * holding the original prefix part and an uncompressed version of the
- * trailing part (the malloc area passed in is freed).
- */
-static int
-png_decompress_chunk(png_structrp png_ptr,
-    png_uint_32 chunklength, png_uint_32 prefix_size,
-    png_alloc_size_t *newlength /* must be initialized to the maximum! */,
-    int terminate /*add a '\0' to the end of the uncompressed data*/)
-{
-   /* TODO: implement different limits for different types of chunk.
-    *
-    * The caller supplies *newlength set to the maximum length of the
-    * uncompressed data, but this routine allocates space for the prefix and
-    * maybe a '\0' terminator too.  We have to assume that 'prefix_size' is
-    * limited only by the maximum chunk size.
-    */
-   png_alloc_size_t limit = PNG_SIZE_MAX;
-
-# ifdef PNG_SET_USER_LIMITS_SUPPORTED
-   if (png_ptr->user_chunk_malloc_max > 0 &&
-       png_ptr->user_chunk_malloc_max < limit)
-      limit = png_ptr->user_chunk_malloc_max;
-# elif PNG_USER_CHUNK_MALLOC_MAX > 0
-   if (PNG_USER_CHUNK_MALLOC_MAX < limit)
-      limit = PNG_USER_CHUNK_MALLOC_MAX;
-# endif
-
-   if (limit >= prefix_size + (terminate != 0))
-   {
-      int ret;
-
-      limit -= prefix_size + (terminate != 0);
-
-      if (limit < *newlength)
-         *newlength = limit;
-
-      /* Now try to claim the stream. */
-      ret = png_inflate_claim(png_ptr, png_ptr->chunk_name);
-
-      if (ret == Z_OK)
-      {
-         png_uint_32 lzsize = chunklength - prefix_size;
-
-         ret = png_inflate(png_ptr, png_ptr->chunk_name, 1/*finish*/,
-             /* input: */ png_ptr->read_buffer + prefix_size, &lzsize,
-             /* output: */ NULL, newlength);
-
-         if (ret == Z_STREAM_END)
-         {
-            /* Use 'inflateReset' here, not 'inflateReset2' because this
-             * preserves the previously decided window size (otherwise it would
-             * be necessary to store the previous window size.)  In practice
-             * this doesn't matter anyway, because png_inflate will call inflate
-             * with Z_FINISH in almost all cases, so the window will not be
-             * maintained.
-             */
-            if (inflateReset(&png_ptr->zstream) == Z_OK)
-            {
-               /* Because of the limit checks above we know that the new,
-                * expanded, size will fit in a size_t (let alone an
-                * png_alloc_size_t).  Use png_malloc_base here to avoid an
-                * extra OOM message.
-                */
-               png_alloc_size_t new_size = *newlength;
-               png_alloc_size_t buffer_size = prefix_size + new_size +
-                   (terminate != 0);
-               png_bytep text = png_voidcast(png_bytep, png_malloc_base(png_ptr,
-                   buffer_size));
-
-               if (text != NULL)
-               {
-                  memset(text, 0, buffer_size);
-
-                  ret = png_inflate(png_ptr, png_ptr->chunk_name, 1/*finish*/,
-                      png_ptr->read_buffer + prefix_size, &lzsize,
-                      text + prefix_size, newlength);
-
-                  if (ret == Z_STREAM_END)
-                  {
-                     if (new_size == *newlength)
-                     {
-                        if (terminate != 0)
-                           text[prefix_size + *newlength] = 0;
-
-                        if (prefix_size > 0)
-                           memcpy(text, png_ptr->read_buffer, prefix_size);
-
-                        {
-                           png_bytep old_ptr = png_ptr->read_buffer;
-
-                           png_ptr->read_buffer = text;
-                           png_ptr->read_buffer_size = buffer_size;
-                           text = old_ptr; /* freed below */
-                        }
-                     }
-
-                     else
-                     {
-                        /* The size changed on the second read, there can be no
-                         * guarantee that anything is correct at this point.
-                         * The 'msg' pointer has been set to "unexpected end of
-                         * LZ stream", which is fine, but return an error code
-                         * that the caller won't accept.
-                         */
-                        ret = PNG_UNEXPECTED_ZLIB_RETURN;
-                     }
-                  }
-
-                  else if (ret == Z_OK)
-                     ret = PNG_UNEXPECTED_ZLIB_RETURN; /* for safety */
-
-                  /* Free the text pointer (this is the old read_buffer on
-                   * success)
-                   */
-                  png_free(png_ptr, text);
-
-                  /* This really is very benign, but it's still an error because
-                   * the extra space may otherwise be used as a Trojan Horse.
-                   */
-                  if (ret == Z_STREAM_END &&
-                      chunklength - prefix_size != lzsize)
-                     png_chunk_benign_error(png_ptr, "extra compressed data");
-               }
-
-               else
-               {
-                  /* Out of memory allocating the buffer */
-                  ret = Z_MEM_ERROR;
-                  png_zstream_error(png_ptr, Z_MEM_ERROR);
-               }
-            }
-
-            else
-            {
-               /* inflateReset failed, store the error message */
-               png_zstream_error(png_ptr, ret);
-               ret = PNG_UNEXPECTED_ZLIB_RETURN;
-            }
-         }
-
-         else if (ret == Z_OK)
-            ret = PNG_UNEXPECTED_ZLIB_RETURN;
-
-         /* Release the claimed stream */
-         png_ptr->zowner = 0;
-      }
-
-      else /* the claim failed */ if (ret == Z_STREAM_END) /* impossible! */
-         ret = PNG_UNEXPECTED_ZLIB_RETURN;
-
-      return ret;
-   }
-
-   else
-   {
-      /* Application/configuration limits exceeded */
-      png_zstream_error(png_ptr, Z_MEM_ERROR);
-      return Z_MEM_ERROR;
-   }
-}
-#endif /* READ_zTXt || READ_iTXt */
-#endif /* READ_COMPRESSED_TEXT */
-
-#ifdef PNG_READ_iCCP_SUPPORTED
-/* Perform a partial read and decompress, producing 'avail_out' bytes and
- * reading from the current chunk as required.
- */
-static int
-png_inflate_read(png_structrp png_ptr, png_bytep read_buffer, uInt read_size,
-    png_uint_32p chunk_bytes, png_bytep next_out, png_alloc_size_t *out_size,
-    int finish)
-{
-   if (png_ptr->zowner == png_ptr->chunk_name)
-   {
-      int ret;
-
-      /* next_in and avail_in must have been initialized by the caller. */
-      png_ptr->zstream.next_out = next_out;
-      png_ptr->zstream.avail_out = 0; /* set in the loop */
-
-      do
-      {
-         if (png_ptr->zstream.avail_in == 0)
-         {
-            if (read_size > *chunk_bytes)
-               read_size = (uInt)*chunk_bytes;
-            *chunk_bytes -= read_size;
-
-            if (read_size > 0)
-               png_crc_read(png_ptr, read_buffer, read_size);
-
-            png_ptr->zstream.next_in = read_buffer;
-            png_ptr->zstream.avail_in = read_size;
-         }
-
-         if (png_ptr->zstream.avail_out == 0)
-         {
-            uInt avail = ZLIB_IO_MAX;
-            if (avail > *out_size)
-               avail = (uInt)*out_size;
-            *out_size -= avail;
-
-            png_ptr->zstream.avail_out = avail;
-         }
-
-         /* Use Z_SYNC_FLUSH when there is no more chunk data to ensure that all
-          * the available output is produced; this allows reading of truncated
-          * streams.
-          */
-         ret = PNG_INFLATE(png_ptr, *chunk_bytes > 0 ?
-             Z_NO_FLUSH : (finish ? Z_FINISH : Z_SYNC_FLUSH));
-      }
-      while (ret == Z_OK && (*out_size > 0 || png_ptr->zstream.avail_out > 0));
-
-      *out_size += png_ptr->zstream.avail_out;
-      png_ptr->zstream.avail_out = 0; /* Should not be required, but is safe */
-
-      /* Ensure the error message pointer is always set: */
-      png_zstream_error(png_ptr, ret);
-      return ret;
-   }
-
-   else
-   {
-      png_ptr->zstream.msg = PNGZ_MSG_CAST("zstream unclaimed");
-      return Z_STREAM_ERROR;
-   }
-}
-#endif /* READ_iCCP */
-
-/* Read and check the IDHR chunk */
-
-void /* PRIVATE */
-png_handle_IHDR(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
-{
-   png_byte buf[13];
-   png_uint_32 width, height;
-   int bit_depth, color_type, compression_type, filter_type;
-   int interlace_type;
-
-   png_debug(1, "in png_handle_IHDR");
-
-   if ((png_ptr->mode & PNG_HAVE_IHDR) != 0)
-      png_chunk_error(png_ptr, "out of place");
-
-   /* Check the length */
-   if (length != 13)
-      png_chunk_error(png_ptr, "invalid");
-
-   png_ptr->mode |= PNG_HAVE_IHDR;
-
-   png_crc_read(png_ptr, buf, 13);
-   png_crc_finish(png_ptr, 0);
-
-   width = png_get_uint_31(png_ptr, buf);
-   height = png_get_uint_31(png_ptr, buf + 4);
-   bit_depth = buf[8];
-   color_type = buf[9];
-   compression_type = buf[10];
-   filter_type = buf[11];
-   interlace_type = buf[12];
-
-   /* Set internal variables */
-   png_ptr->width = width;
-   png_ptr->height = height;
-   png_ptr->bit_depth = (png_byte)bit_depth;
-   png_ptr->interlaced = (png_byte)interlace_type;
-   png_ptr->color_type = (png_byte)color_type;
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-   png_ptr->filter_type = (png_byte)filter_type;
-#endif
-   png_ptr->compression_type = (png_byte)compression_type;
-
-   /* Find number of channels */
-   switch (png_ptr->color_type)
-   {
-      default: /* invalid, png_set_IHDR calls png_error */
-      case PNG_COLOR_TYPE_GRAY:
-      case PNG_COLOR_TYPE_PALETTE:
-         png_ptr->channels = 1;
-         break;
-
-      case PNG_COLOR_TYPE_RGB:
-         png_ptr->channels = 3;
-         break;
-
-      case PNG_COLOR_TYPE_GRAY_ALPHA:
-         png_ptr->channels = 2;
-         break;
-
-      case PNG_COLOR_TYPE_RGB_ALPHA:
-         png_ptr->channels = 4;
-         break;
-   }
-
-   /* Set up other useful info */
-   png_ptr->pixel_depth = (png_byte)(png_ptr->bit_depth * png_ptr->channels);
-   png_ptr->rowbytes = PNG_ROWBYTES(png_ptr->pixel_depth, png_ptr->width);
-   png_debug1(3, "bit_depth = %d", png_ptr->bit_depth);
-   png_debug1(3, "channels = %d", png_ptr->channels);
-   png_debug1(3, "rowbytes = %lu", (unsigned long)png_ptr->rowbytes);
-   png_set_IHDR(png_ptr, info_ptr, width, height, bit_depth,
-       color_type, interlace_type, compression_type, filter_type);
-}
-
-/* Read and check the palette */
-void /* PRIVATE */
-png_handle_PLTE(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
-{
-   png_color palette[PNG_MAX_PALETTE_LENGTH];
-   int max_palette_length, num, i;
-#ifdef PNG_POINTER_INDEXING_SUPPORTED
-   png_colorp pal_ptr;
-#endif
-
-   png_debug(1, "in png_handle_PLTE");
-
-   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
-      png_chunk_error(png_ptr, "missing IHDR");
-
-   /* Moved to before the 'after IDAT' check below because otherwise duplicate
-    * PLTE chunks are potentially ignored (the spec says there shall not be more
-    * than one PLTE, the error is not treated as benign, so this check trumps
-    * the requirement that PLTE appears before IDAT.)
-    */
-   else if ((png_ptr->mode & PNG_HAVE_PLTE) != 0)
-      png_chunk_error(png_ptr, "duplicate");
-
-   else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
-   {
-      /* This is benign because the non-benign error happened before, when an
-       * IDAT was encountered in a color-mapped image with no PLTE.
-       */
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "out of place");
-      return;
-   }
-
-   png_ptr->mode |= PNG_HAVE_PLTE;
-
-   if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) == 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "ignored in grayscale PNG");
-      return;
-   }
-
-#ifndef PNG_READ_OPT_PLTE_SUPPORTED
-   if (png_ptr->color_type != PNG_COLOR_TYPE_PALETTE)
-   {
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-#endif
-
-   if (length > 3*PNG_MAX_PALETTE_LENGTH || length % 3)
-   {
-      png_crc_finish(png_ptr, length);
-
-      if (png_ptr->color_type != PNG_COLOR_TYPE_PALETTE)
-         png_chunk_benign_error(png_ptr, "invalid");
-
-      else
-         png_chunk_error(png_ptr, "invalid");
-
-      return;
-   }
-
-   /* The cast is safe because 'length' is less than 3*PNG_MAX_PALETTE_LENGTH */
-   num = (int)length / 3;
-
-   /* If the palette has 256 or fewer entries but is too large for the bit
-    * depth, we don't issue an error, to preserve the behavior of previous
-    * libpng versions. We silently truncate the unused extra palette entries
-    * here.
-    */
-   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-      max_palette_length = (1 << png_ptr->bit_depth);
-   else
-      max_palette_length = PNG_MAX_PALETTE_LENGTH;
-
-   if (num > max_palette_length)
-      num = max_palette_length;
-
-#ifdef PNG_POINTER_INDEXING_SUPPORTED
-   for (i = 0, pal_ptr = palette; i < num; i++, pal_ptr++)
-   {
-      png_byte buf[3];
-
-      png_crc_read(png_ptr, buf, 3);
-      pal_ptr->red = buf[0];
-      pal_ptr->green = buf[1];
-      pal_ptr->blue = buf[2];
-   }
-#else
-   for (i = 0; i < num; i++)
-   {
-      png_byte buf[3];
-
-      png_crc_read(png_ptr, buf, 3);
-      /* Don't depend upon png_color being any order */
-      palette[i].red = buf[0];
-      palette[i].green = buf[1];
-      palette[i].blue = buf[2];
-   }
-#endif
-
-   /* If we actually need the PLTE chunk (ie for a paletted image), we do
-    * whatever the normal CRC configuration tells us.  However, if we
-    * have an RGB image, the PLTE can be considered ancillary, so
-    * we will act as though it is.
-    */
-#ifndef PNG_READ_OPT_PLTE_SUPPORTED
-   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-#endif
-   {
-      png_crc_finish(png_ptr, (png_uint_32) (length - (unsigned int)num * 3));
-   }
-
-#ifndef PNG_READ_OPT_PLTE_SUPPORTED
-   else if (png_crc_error(png_ptr) != 0)  /* Only if we have a CRC error */
-   {
-      /* If we don't want to use the data from an ancillary chunk,
-       * we have two options: an error abort, or a warning and we
-       * ignore the data in this chunk (which should be OK, since
-       * it's considered ancillary for a RGB or RGBA image).
-       *
-       * IMPLEMENTATION NOTE: this is only here because png_crc_finish uses the
-       * chunk type to determine whether to check the ancillary or the critical
-       * flags.
-       */
-      if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_USE) == 0)
-      {
-         if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_NOWARN) != 0)
-            return;
-
-         else
-            png_chunk_error(png_ptr, "CRC error");
-      }
-
-      /* Otherwise, we (optionally) emit a warning and use the chunk. */
-      else if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_NOWARN) == 0)
-         png_chunk_warning(png_ptr, "CRC error");
-   }
-#endif
-
-   /* TODO: png_set_PLTE has the side effect of setting png_ptr->palette to its
-    * own copy of the palette.  This has the side effect that when png_start_row
-    * is called (this happens after any call to png_read_update_info) the
-    * info_ptr palette gets changed.  This is extremely unexpected and
-    * confusing.
-    *
-    * Fix this by not sharing the palette in this way.
-    */
-   png_set_PLTE(png_ptr, info_ptr, palette, num);
-
-   /* The three chunks, bKGD, hIST and tRNS *must* appear after PLTE and before
-    * IDAT.  Prior to 1.6.0 this was not checked; instead the code merely
-    * checked the apparent validity of a tRNS chunk inserted before PLTE on a
-    * palette PNG.  1.6.0 attempts to rigorously follow the standard and
-    * therefore does a benign error if the erroneous condition is detected *and*
-    * cancels the tRNS if the benign error returns.  The alternative is to
-    * amend the standard since it would be rather hypocritical of the standards
-    * maintainers to ignore it.
-    */
-#ifdef PNG_READ_tRNS_SUPPORTED
-   if (png_ptr->num_trans > 0 ||
-       (info_ptr != NULL && (info_ptr->valid & PNG_INFO_tRNS) != 0))
-   {
-      /* Cancel this because otherwise it would be used if the transforms
-       * require it.  Don't cancel the 'valid' flag because this would prevent
-       * detection of duplicate chunks.
-       */
-      png_ptr->num_trans = 0;
-
-      if (info_ptr != NULL)
-         info_ptr->num_trans = 0;
-
-      png_chunk_benign_error(png_ptr, "tRNS must be after");
-   }
-#endif
-
-#ifdef PNG_READ_hIST_SUPPORTED
-   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_hIST) != 0)
-      png_chunk_benign_error(png_ptr, "hIST must be after");
-#endif
-
-#ifdef PNG_READ_bKGD_SUPPORTED
-   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_bKGD) != 0)
-      png_chunk_benign_error(png_ptr, "bKGD must be after");
-#endif
-}
-
-void /* PRIVATE */
-png_handle_IEND(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
-{
-   png_debug(1, "in png_handle_IEND");
-
-   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0 ||
-       (png_ptr->mode & PNG_HAVE_IDAT) == 0)
-      png_chunk_error(png_ptr, "out of place");
-
-   png_ptr->mode |= (PNG_AFTER_IDAT | PNG_HAVE_IEND);
-
-   png_crc_finish(png_ptr, length);
-
-   if (length != 0)
-      png_chunk_benign_error(png_ptr, "invalid");
-
-   PNG_UNUSED(info_ptr)
-}
-
-#ifdef PNG_READ_gAMA_SUPPORTED
-void /* PRIVATE */
-png_handle_gAMA(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
-{
-   png_fixed_point igamma;
-   png_byte buf[4];
-
-   png_debug(1, "in png_handle_gAMA");
-
-   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
-      png_chunk_error(png_ptr, "missing IHDR");
-
-   else if ((png_ptr->mode & (PNG_HAVE_IDAT|PNG_HAVE_PLTE)) != 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "out of place");
-      return;
-   }
-
-   if (length != 4)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "invalid");
-      return;
-   }
-
-   png_crc_read(png_ptr, buf, 4);
-
-   if (png_crc_finish(png_ptr, 0) != 0)
-      return;
-
-   igamma = png_get_fixed_point(NULL, buf);
-
-   png_colorspace_set_gamma(png_ptr, &png_ptr->colorspace, igamma);
-   png_colorspace_sync(png_ptr, info_ptr);
-}
-#endif
-
-#ifdef PNG_READ_sBIT_SUPPORTED
-void /* PRIVATE */
-png_handle_sBIT(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
-{
-   unsigned int truelen, i;
-   png_byte sample_depth;
-   png_byte buf[4];
-
-   png_debug(1, "in png_handle_sBIT");
-
-   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
-      png_chunk_error(png_ptr, "missing IHDR");
-
-   else if ((png_ptr->mode & (PNG_HAVE_IDAT|PNG_HAVE_PLTE)) != 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "out of place");
-      return;
-   }
-
-   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_sBIT) != 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "duplicate");
-      return;
-   }
-
-   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-   {
-      truelen = 3;
-      sample_depth = 8;
-   }
-
-   else
-   {
-      truelen = png_ptr->channels;
-      sample_depth = png_ptr->bit_depth;
-   }
-
-   if (length != truelen || length > 4)
-   {
-      png_chunk_benign_error(png_ptr, "invalid");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   buf[0] = buf[1] = buf[2] = buf[3] = sample_depth;
-   png_crc_read(png_ptr, buf, truelen);
-
-   if (png_crc_finish(png_ptr, 0) != 0)
-      return;
-
-   for (i=0; i<truelen; ++i)
-   {
-      if (buf[i] == 0 || buf[i] > sample_depth)
-      {
-         png_chunk_benign_error(png_ptr, "invalid");
-         return;
-      }
-   }
-
-   if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) != 0)
-   {
-      png_ptr->sig_bit.red = buf[0];
-      png_ptr->sig_bit.green = buf[1];
-      png_ptr->sig_bit.blue = buf[2];
-      png_ptr->sig_bit.alpha = buf[3];
-   }
-
-   else
-   {
-      png_ptr->sig_bit.gray = buf[0];
-      png_ptr->sig_bit.red = buf[0];
-      png_ptr->sig_bit.green = buf[0];
-      png_ptr->sig_bit.blue = buf[0];
-      png_ptr->sig_bit.alpha = buf[1];
-   }
-
-   png_set_sBIT(png_ptr, info_ptr, &(png_ptr->sig_bit));
-}
-#endif
-
-#ifdef PNG_READ_cHRM_SUPPORTED
-void /* PRIVATE */
-png_handle_cHRM(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
-{
-   png_byte buf[32];
-   png_xy xy;
-
-   png_debug(1, "in png_handle_cHRM");
-
-   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
-      png_chunk_error(png_ptr, "missing IHDR");
-
-   else if ((png_ptr->mode & (PNG_HAVE_IDAT|PNG_HAVE_PLTE)) != 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "out of place");
-      return;
-   }
-
-   if (length != 32)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "invalid");
-      return;
-   }
-
-   png_crc_read(png_ptr, buf, 32);
-
-   if (png_crc_finish(png_ptr, 0) != 0)
-      return;
-
-   xy.whitex = png_get_fixed_point(NULL, buf);
-   xy.whitey = png_get_fixed_point(NULL, buf + 4);
-   xy.redx   = png_get_fixed_point(NULL, buf + 8);
-   xy.redy   = png_get_fixed_point(NULL, buf + 12);
-   xy.greenx = png_get_fixed_point(NULL, buf + 16);
-   xy.greeny = png_get_fixed_point(NULL, buf + 20);
-   xy.bluex  = png_get_fixed_point(NULL, buf + 24);
-   xy.bluey  = png_get_fixed_point(NULL, buf + 28);
-
-   if (xy.whitex == PNG_FIXED_ERROR ||
-       xy.whitey == PNG_FIXED_ERROR ||
-       xy.redx   == PNG_FIXED_ERROR ||
-       xy.redy   == PNG_FIXED_ERROR ||
-       xy.greenx == PNG_FIXED_ERROR ||
-       xy.greeny == PNG_FIXED_ERROR ||
-       xy.bluex  == PNG_FIXED_ERROR ||
-       xy.bluey  == PNG_FIXED_ERROR)
-   {
-      png_chunk_benign_error(png_ptr, "invalid values");
-      return;
-   }
-
-   /* If a colorspace error has already been output skip this chunk */
-   if ((png_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) != 0)
-      return;
-
-   if ((png_ptr->colorspace.flags & PNG_COLORSPACE_FROM_cHRM) != 0)
-   {
-      png_ptr->colorspace.flags |= PNG_COLORSPACE_INVALID;
-      png_colorspace_sync(png_ptr, info_ptr);
-      png_chunk_benign_error(png_ptr, "duplicate");
-      return;
-   }
-
-   png_ptr->colorspace.flags |= PNG_COLORSPACE_FROM_cHRM;
-   (void)png_colorspace_set_chromaticities(png_ptr, &png_ptr->colorspace, &xy,
-       1/*prefer cHRM values*/);
-   png_colorspace_sync(png_ptr, info_ptr);
-}
-#endif
-
-#ifdef PNG_READ_sRGB_SUPPORTED
-void /* PRIVATE */
-png_handle_sRGB(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
-{
-   png_byte intent;
-
-   png_debug(1, "in png_handle_sRGB");
-
-   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
-      png_chunk_error(png_ptr, "missing IHDR");
-
-   else if ((png_ptr->mode & (PNG_HAVE_IDAT|PNG_HAVE_PLTE)) != 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "out of place");
-      return;
-   }
-
-   if (length != 1)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "invalid");
-      return;
-   }
-
-   png_crc_read(png_ptr, &intent, 1);
-
-   if (png_crc_finish(png_ptr, 0) != 0)
-      return;
-
-   /* If a colorspace error has already been output skip this chunk */
-   if ((png_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) != 0)
-      return;
-
-   /* Only one sRGB or iCCP chunk is allowed, use the HAVE_INTENT flag to detect
-    * this.
-    */
-   if ((png_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_INTENT) != 0)
-   {
-      png_ptr->colorspace.flags |= PNG_COLORSPACE_INVALID;
-      png_colorspace_sync(png_ptr, info_ptr);
-      png_chunk_benign_error(png_ptr, "too many profiles");
-      return;
-   }
-
-   (void)png_colorspace_set_sRGB(png_ptr, &png_ptr->colorspace, intent);
-   png_colorspace_sync(png_ptr, info_ptr);
-}
-#endif /* READ_sRGB */
-
-#ifdef PNG_READ_iCCP_SUPPORTED
-void /* PRIVATE */
-png_handle_iCCP(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
-/* Note: this does not properly handle profiles that are > 64K under DOS */
-{
-   png_const_charp errmsg = NULL; /* error message output, or no error */
-   int finished = 0; /* crc checked */
-
-   png_debug(1, "in png_handle_iCCP");
-
-   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
-      png_chunk_error(png_ptr, "missing IHDR");
-
-   else if ((png_ptr->mode & (PNG_HAVE_IDAT|PNG_HAVE_PLTE)) != 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "out of place");
-      return;
-   }
-
-   /* Consistent with all the above colorspace handling an obviously *invalid*
-    * chunk is just ignored, so does not invalidate the color space.  An
-    * alternative is to set the 'invalid' flags at the start of this routine
-    * and only clear them in they were not set before and all the tests pass.
-    */
-
-   /* The keyword must be at least one character and there is a
-    * terminator (0) byte and the compression method byte, and the
-    * 'zlib' datastream is at least 11 bytes.
-    */
-   if (length < 14)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "too short");
-      return;
-   }
-
-   /* If a colorspace error has already been output skip this chunk */
-   if ((png_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) != 0)
-   {
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   /* Only one sRGB or iCCP chunk is allowed, use the HAVE_INTENT flag to detect
-    * this.
-    */
-   if ((png_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_INTENT) == 0)
-   {
-      uInt read_length, keyword_length;
-      char keyword[81];
-
-      /* Find the keyword; the keyword plus separator and compression method
-       * bytes can be at most 81 characters long.
-       */
-      read_length = 81; /* maximum */
-      if (read_length > length)
-         read_length = (uInt)length;
-
-      png_crc_read(png_ptr, (png_bytep)keyword, read_length);
-      length -= read_length;
-
-      /* The minimum 'zlib' stream is assumed to be just the 2 byte header,
-       * 5 bytes minimum 'deflate' stream, and the 4 byte checksum.
-       */
-      if (length < 11)
-      {
-         png_crc_finish(png_ptr, length);
-         png_chunk_benign_error(png_ptr, "too short");
-         return;
-      }
-
-      keyword_length = 0;
-      while (keyword_length < 80 && keyword_length < read_length &&
-         keyword[keyword_length] != 0)
-         ++keyword_length;
-
-      /* TODO: make the keyword checking common */
-      if (keyword_length >= 1 && keyword_length <= 79)
-      {
-         /* We only understand '0' compression - deflate - so if we get a
-          * different value we can't safely decode the chunk.
-          */
-         if (keyword_length+1 < read_length &&
-            keyword[keyword_length+1] == PNG_COMPRESSION_TYPE_BASE)
-         {
-            read_length -= keyword_length+2;
-
-            if (png_inflate_claim(png_ptr, png_iCCP) == Z_OK)
-            {
-               Byte profile_header[132]={0};
-               Byte local_buffer[PNG_INFLATE_BUF_SIZE];
-               png_alloc_size_t size = (sizeof profile_header);
-
-               png_ptr->zstream.next_in = (Bytef*)keyword + (keyword_length+2);
-               png_ptr->zstream.avail_in = read_length;
-               (void)png_inflate_read(png_ptr, local_buffer,
-                   (sizeof local_buffer), &length, profile_header, &size,
-                   0/*finish: don't, because the output is too small*/);
-
-               if (size == 0)
-               {
-                  /* We have the ICC profile header; do the basic header checks.
-                   */
-                  png_uint_32 profile_length = png_get_uint_32(profile_header);
-
-                  if (png_icc_check_length(png_ptr, &png_ptr->colorspace,
-                      keyword, profile_length) != 0)
-                  {
-                     /* The length is apparently ok, so we can check the 132
-                      * byte header.
-                      */
-                     if (png_icc_check_header(png_ptr, &png_ptr->colorspace,
-                         keyword, profile_length, profile_header,
-                         png_ptr->color_type) != 0)
-                     {
-                        /* Now read the tag table; a variable size buffer is
-                         * needed at this point, allocate one for the whole
-                         * profile.  The header check has already validated
-                         * that none of this stuff will overflow.
-                         */
-                        png_uint_32 tag_count =
-                           png_get_uint_32(profile_header + 128);
-                        png_bytep profile = png_read_buffer(png_ptr,
-                            profile_length, 2/*silent*/);
-
-                        if (profile != NULL)
-                        {
-                           memcpy(profile, profile_header,
-                               (sizeof profile_header));
-
-                           size = 12 * tag_count;
-
-                           (void)png_inflate_read(png_ptr, local_buffer,
-                               (sizeof local_buffer), &length,
-                               profile + (sizeof profile_header), &size, 0);
-
-                           /* Still expect a buffer error because we expect
-                            * there to be some tag data!
-                            */
-                           if (size == 0)
-                           {
-                              if (png_icc_check_tag_table(png_ptr,
-                                  &png_ptr->colorspace, keyword, profile_length,
-                                  profile) != 0)
-                              {
-                                 /* The profile has been validated for basic
-                                  * security issues, so read the whole thing in.
-                                  */
-                                 size = profile_length - (sizeof profile_header)
-                                     - 12 * tag_count;
-
-                                 (void)png_inflate_read(png_ptr, local_buffer,
-                                     (sizeof local_buffer), &length,
-                                     profile + (sizeof profile_header) +
-                                     12 * tag_count, &size, 1/*finish*/);
-
-                                 if (length > 0 && !(png_ptr->flags &
-                                     PNG_FLAG_BENIGN_ERRORS_WARN))
-                                    errmsg = "extra compressed data";
-
-                                 /* But otherwise allow extra data: */
-                                 else if (size == 0)
-                                 {
-                                    if (length > 0)
-                                    {
-                                       /* This can be handled completely, so
-                                        * keep going.
-                                        */
-                                       png_chunk_warning(png_ptr,
-                                           "extra compressed data");
-                                    }
-
-                                    png_crc_finish(png_ptr, length);
-                                    finished = 1;
-
-# if defined(PNG_sRGB_SUPPORTED) && PNG_sRGB_PROFILE_CHECKS >= 0
-                                    /* Check for a match against sRGB */
-                                    png_icc_set_sRGB(png_ptr,
-                                        &png_ptr->colorspace, profile,
-                                        png_ptr->zstream.adler);
-# endif
-
-                                    /* Steal the profile for info_ptr. */
-                                    if (info_ptr != NULL)
-                                    {
-                                       png_free_data(png_ptr, info_ptr,
-                                           PNG_FREE_ICCP, 0);
-
-                                       info_ptr->iccp_name = png_voidcast(char*,
-                                           png_malloc_base(png_ptr,
-                                           keyword_length+1));
-                                       if (info_ptr->iccp_name != NULL)
-                                       {
-                                          memcpy(info_ptr->iccp_name, keyword,
-                                              keyword_length+1);
-                                          info_ptr->iccp_proflen =
-                                              profile_length;
-                                          info_ptr->iccp_profile = profile;
-                                          png_ptr->read_buffer = NULL; /*steal*/
-                                          info_ptr->free_me |= PNG_FREE_ICCP;
-                                          info_ptr->valid |= PNG_INFO_iCCP;
-                                       }
-
-                                       else
-                                       {
-                                          png_ptr->colorspace.flags |=
-                                             PNG_COLORSPACE_INVALID;
-                                          errmsg = "out of memory";
-                                       }
-                                    }
-
-                                    /* else the profile remains in the read
-                                     * buffer which gets reused for subsequent
-                                     * chunks.
-                                     */
-
-                                    if (info_ptr != NULL)
-                                       png_colorspace_sync(png_ptr, info_ptr);
-
-                                    if (errmsg == NULL)
-                                    {
-                                       png_ptr->zowner = 0;
-                                       return;
-                                    }
-                                 }
-                                 if (errmsg == NULL)
-                                    errmsg = png_ptr->zstream.msg;
-                              }
-                              /* else png_icc_check_tag_table output an error */
-                           }
-                           else /* profile truncated */
-                              errmsg = png_ptr->zstream.msg;
-                        }
-
-                        else
-                           errmsg = "out of memory";
-                     }
-
-                     /* else png_icc_check_header output an error */
-                  }
-
-                  /* else png_icc_check_length output an error */
-               }
-
-               else /* profile truncated */
-                  errmsg = png_ptr->zstream.msg;
-
-               /* Release the stream */
-               png_ptr->zowner = 0;
-            }
-
-            else /* png_inflate_claim failed */
-               errmsg = png_ptr->zstream.msg;
-         }
-
-         else
-            errmsg = "bad compression method"; /* or missing */
-      }
-
-      else
-         errmsg = "bad keyword";
-   }
-
-   else
-      errmsg = "too many profiles";
-
-   /* Failure: the reason is in 'errmsg' */
-   if (finished == 0)
-      png_crc_finish(png_ptr, length);
-
-   png_ptr->colorspace.flags |= PNG_COLORSPACE_INVALID;
-   png_colorspace_sync(png_ptr, info_ptr);
-   if (errmsg != NULL) /* else already output */
-      png_chunk_benign_error(png_ptr, errmsg);
-}
-#endif /* READ_iCCP */
-
-#ifdef PNG_READ_sPLT_SUPPORTED
-void /* PRIVATE */
-png_handle_sPLT(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
-/* Note: this does not properly handle chunks that are > 64K under DOS */
-{
-   png_bytep entry_start, buffer;
-   png_sPLT_t new_palette;
-   png_sPLT_entryp pp;
-   png_uint_32 data_length;
-   int entry_size, i;
-   png_uint_32 skip = 0;
-   png_uint_32 dl;
-   size_t max_dl;
-
-   png_debug(1, "in png_handle_sPLT");
-
-#ifdef PNG_USER_LIMITS_SUPPORTED
-   if (png_ptr->user_chunk_cache_max != 0)
-   {
-      if (png_ptr->user_chunk_cache_max == 1)
-      {
-         png_crc_finish(png_ptr, length);
-         return;
-      }
-
-      if (--png_ptr->user_chunk_cache_max == 1)
-      {
-         png_warning(png_ptr, "No space in chunk cache for sPLT");
-         png_crc_finish(png_ptr, length);
-         return;
-      }
-   }
-#endif
-
-   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
-      png_chunk_error(png_ptr, "missing IHDR");
-
-   else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "out of place");
-      return;
-   }
-
-#ifdef PNG_MAX_MALLOC_64K
-   if (length > 65535U)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "too large to fit in memory");
-      return;
-   }
-#endif
-
-   buffer = png_read_buffer(png_ptr, length+1, 2/*silent*/);
-   if (buffer == NULL)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "out of memory");
-      return;
-   }
-
-
-   /* WARNING: this may break if size_t is less than 32 bits; it is assumed
-    * that the PNG_MAX_MALLOC_64K test is enabled in this case, but this is a
-    * potential breakage point if the types in pngconf.h aren't exactly right.
-    */
-   png_crc_read(png_ptr, buffer, length);
-
-   if (png_crc_finish(png_ptr, skip) != 0)
-      return;
-
-   buffer[length] = 0;
-
-   for (entry_start = buffer; *entry_start; entry_start++)
-      /* Empty loop to find end of name */ ;
-
-   ++entry_start;
-
-   /* A sample depth should follow the separator, and we should be on it  */
-   if (length < 2U || entry_start > buffer + (length - 2U))
-   {
-      png_warning(png_ptr, "malformed sPLT chunk");
-      return;
-   }
-
-   new_palette.depth = *entry_start++;
-   entry_size = (new_palette.depth == 8 ? 6 : 10);
-   /* This must fit in a png_uint_32 because it is derived from the original
-    * chunk data length.
-    */
-   data_length = length - (png_uint_32)(entry_start - buffer);
-
-   /* Integrity-check the data length */
-   if ((data_length % (unsigned int)entry_size) != 0)
-   {
-      png_warning(png_ptr, "sPLT chunk has bad length");
-      return;
-   }
-
-   dl = (png_uint_32)(data_length / (unsigned int)entry_size);
-   max_dl = PNG_SIZE_MAX / (sizeof (png_sPLT_entry));
-
-   if (dl > max_dl)
-   {
-      png_warning(png_ptr, "sPLT chunk too long");
-      return;
-   }
-
-   new_palette.nentries = (png_int_32)(data_length / (unsigned int)entry_size);
-
-   new_palette.entries = (png_sPLT_entryp)png_malloc_warn(png_ptr,
-       (png_alloc_size_t) new_palette.nentries * (sizeof (png_sPLT_entry)));
-
-   if (new_palette.entries == NULL)
-   {
-      png_warning(png_ptr, "sPLT chunk requires too much memory");
-      return;
-   }
-
-#ifdef PNG_POINTER_INDEXING_SUPPORTED
-   for (i = 0; i < new_palette.nentries; i++)
-   {
-      pp = new_palette.entries + i;
-
-      if (new_palette.depth == 8)
-      {
-         pp->red = *entry_start++;
-         pp->green = *entry_start++;
-         pp->blue = *entry_start++;
-         pp->alpha = *entry_start++;
-      }
-
-      else
-      {
-         pp->red   = png_get_uint_16(entry_start); entry_start += 2;
-         pp->green = png_get_uint_16(entry_start); entry_start += 2;
-         pp->blue  = png_get_uint_16(entry_start); entry_start += 2;
-         pp->alpha = png_get_uint_16(entry_start); entry_start += 2;
-      }
-
-      pp->frequency = png_get_uint_16(entry_start); entry_start += 2;
-   }
-#else
-   pp = new_palette.entries;
-
-   for (i = 0; i < new_palette.nentries; i++)
-   {
-
-      if (new_palette.depth == 8)
-      {
-         pp[i].red   = *entry_start++;
-         pp[i].green = *entry_start++;
-         pp[i].blue  = *entry_start++;
-         pp[i].alpha = *entry_start++;
-      }
-
-      else
-      {
-         pp[i].red   = png_get_uint_16(entry_start); entry_start += 2;
-         pp[i].green = png_get_uint_16(entry_start); entry_start += 2;
-         pp[i].blue  = png_get_uint_16(entry_start); entry_start += 2;
-         pp[i].alpha = png_get_uint_16(entry_start); entry_start += 2;
-      }
-
-      pp[i].frequency = png_get_uint_16(entry_start); entry_start += 2;
-   }
-#endif
-
-   /* Discard all chunk data except the name and stash that */
-   new_palette.name = (png_charp)buffer;
-
-   png_set_sPLT(png_ptr, info_ptr, &new_palette, 1);
-
-   png_free(png_ptr, new_palette.entries);
-}
-#endif /* READ_sPLT */
-
-#ifdef PNG_READ_tRNS_SUPPORTED
-void /* PRIVATE */
-png_handle_tRNS(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
-{
-   png_byte readbuf[PNG_MAX_PALETTE_LENGTH];
-
-   png_debug(1, "in png_handle_tRNS");
-
-   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
-      png_chunk_error(png_ptr, "missing IHDR");
-
-   else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "out of place");
-      return;
-   }
-
-   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_tRNS) != 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "duplicate");
-      return;
-   }
-
-   if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY)
-   {
-      png_byte buf[2];
-
-      if (length != 2)
-      {
-         png_crc_finish(png_ptr, length);
-         png_chunk_benign_error(png_ptr, "invalid");
-         return;
-      }
-
-      png_crc_read(png_ptr, buf, 2);
-      png_ptr->num_trans = 1;
-      png_ptr->trans_color.gray = png_get_uint_16(buf);
-   }
-
-   else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB)
-   {
-      png_byte buf[6];
-
-      if (length != 6)
-      {
-         png_crc_finish(png_ptr, length);
-         png_chunk_benign_error(png_ptr, "invalid");
-         return;
-      }
-
-      png_crc_read(png_ptr, buf, length);
-      png_ptr->num_trans = 1;
-      png_ptr->trans_color.red = png_get_uint_16(buf);
-      png_ptr->trans_color.green = png_get_uint_16(buf + 2);
-      png_ptr->trans_color.blue = png_get_uint_16(buf + 4);
-   }
-
-   else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-   {
-      if ((png_ptr->mode & PNG_HAVE_PLTE) == 0)
-      {
-         /* TODO: is this actually an error in the ISO spec? */
-         png_crc_finish(png_ptr, length);
-         png_chunk_benign_error(png_ptr, "out of place");
-         return;
-      }
-
-      if (length > (unsigned int) png_ptr->num_palette ||
-         length > (unsigned int) PNG_MAX_PALETTE_LENGTH ||
-         length == 0)
-      {
-         png_crc_finish(png_ptr, length);
-         png_chunk_benign_error(png_ptr, "invalid");
-         return;
-      }
-
-      png_crc_read(png_ptr, readbuf, length);
-      png_ptr->num_trans = (png_uint_16)length;
-   }
-
-   else
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "invalid with alpha channel");
-      return;
-   }
-
-   if (png_crc_finish(png_ptr, 0) != 0)
-   {
-      png_ptr->num_trans = 0;
-      return;
-   }
-
-   /* TODO: this is a horrible side effect in the palette case because the
-    * png_struct ends up with a pointer to the tRNS buffer owned by the
-    * png_info.  Fix this.
-    */
-   png_set_tRNS(png_ptr, info_ptr, readbuf, png_ptr->num_trans,
-       &(png_ptr->trans_color));
-}
-#endif
-
-#ifdef PNG_READ_bKGD_SUPPORTED
-void /* PRIVATE */
-png_handle_bKGD(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
-{
-   unsigned int truelen;
-   png_byte buf[6];
-   png_color_16 background;
-
-   png_debug(1, "in png_handle_bKGD");
-
-   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
-      png_chunk_error(png_ptr, "missing IHDR");
-
-   else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0 ||
-       (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
-       (png_ptr->mode & PNG_HAVE_PLTE) == 0))
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "out of place");
-      return;
-   }
-
-   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_bKGD) != 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "duplicate");
-      return;
-   }
-
-   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-      truelen = 1;
-
-   else if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) != 0)
-      truelen = 6;
-
-   else
-      truelen = 2;
-
-   if (length != truelen)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "invalid");
-      return;
-   }
-
-   png_crc_read(png_ptr, buf, truelen);
-
-   if (png_crc_finish(png_ptr, 0) != 0)
-      return;
-
-   /* We convert the index value into RGB components so that we can allow
-    * arbitrary RGB values for background when we have transparency, and
-    * so it is easy to determine the RGB values of the background color
-    * from the info_ptr struct.
-    */
-   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-   {
-      background.index = buf[0];
-
-      if (info_ptr != NULL && info_ptr->num_palette != 0)
-      {
-         if (buf[0] >= info_ptr->num_palette)
-         {
-            png_chunk_benign_error(png_ptr, "invalid index");
-            return;
-         }
-
-         background.red = (png_uint_16)png_ptr->palette[buf[0]].red;
-         background.green = (png_uint_16)png_ptr->palette[buf[0]].green;
-         background.blue = (png_uint_16)png_ptr->palette[buf[0]].blue;
-      }
-
-      else
-         background.red = background.green = background.blue = 0;
-
-      background.gray = 0;
-   }
-
-   else if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) == 0) /* GRAY */
-   {
-      if (png_ptr->bit_depth <= 8)
-      {
-         if (buf[0] != 0 || buf[1] >= (unsigned int)(1 << png_ptr->bit_depth))
-         {
-            png_chunk_benign_error(png_ptr, "invalid gray level");
-            return;
-         }
-      }
-
-      background.index = 0;
-      background.red =
-      background.green =
-      background.blue =
-      background.gray = png_get_uint_16(buf);
-   }
-
-   else
-   {
-      if (png_ptr->bit_depth <= 8)
-      {
-         if (buf[0] != 0 || buf[2] != 0 || buf[4] != 0)
-         {
-            png_chunk_benign_error(png_ptr, "invalid color");
-            return;
-         }
-      }
-
-      background.index = 0;
-      background.red = png_get_uint_16(buf);
-      background.green = png_get_uint_16(buf + 2);
-      background.blue = png_get_uint_16(buf + 4);
-      background.gray = 0;
-   }
-
-   png_set_bKGD(png_ptr, info_ptr, &background);
-}
-#endif
-
-#ifdef PNG_READ_eXIf_SUPPORTED
-void /* PRIVATE */
-png_handle_eXIf(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
-{
-   unsigned int i;
-
-   png_debug(1, "in png_handle_eXIf");
-
-   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
-      png_chunk_error(png_ptr, "missing IHDR");
-
-   if (length < 2)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "too short");
-      return;
-   }
-
-   else if (info_ptr == NULL || (info_ptr->valid & PNG_INFO_eXIf) != 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "duplicate");
-      return;
-   }
-
-   info_ptr->free_me |= PNG_FREE_EXIF;
-
-   info_ptr->eXIf_buf = png_voidcast(png_bytep,
-             png_malloc_warn(png_ptr, length));
-
-   if (info_ptr->eXIf_buf == NULL)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "out of memory");
-      return;
-   }
-
-   for (i = 0; i < length; i++)
-   {
-      png_byte buf[1];
-      png_crc_read(png_ptr, buf, 1);
-      info_ptr->eXIf_buf[i] = buf[0];
-      if (i == 1)
-      {
-         if ((buf[0] != 'M' && buf[0] != 'I') ||
-             (info_ptr->eXIf_buf[0] != buf[0]))
-         {
-            png_crc_finish(png_ptr, length - 2);
-            png_chunk_benign_error(png_ptr, "incorrect byte-order specifier");
-            png_free(png_ptr, info_ptr->eXIf_buf);
-            info_ptr->eXIf_buf = NULL;
-            return;
-         }
-      }
-   }
-
-   if (png_crc_finish(png_ptr, 0) == 0)
-      png_set_eXIf_1(png_ptr, info_ptr, length, info_ptr->eXIf_buf);
-
-   png_free(png_ptr, info_ptr->eXIf_buf);
-   info_ptr->eXIf_buf = NULL;
-}
-#endif
-
-#ifdef PNG_READ_hIST_SUPPORTED
-void /* PRIVATE */
-png_handle_hIST(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
-{
-   unsigned int num, i;
-   png_uint_16 readbuf[PNG_MAX_PALETTE_LENGTH];
-
-   png_debug(1, "in png_handle_hIST");
-
-   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
-      png_chunk_error(png_ptr, "missing IHDR");
-
-   else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0 ||
-       (png_ptr->mode & PNG_HAVE_PLTE) == 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "out of place");
-      return;
-   }
-
-   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_hIST) != 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "duplicate");
-      return;
-   }
-
-   num = length / 2 ;
-
-   if (length != num * 2 ||
-       num != (unsigned int)png_ptr->num_palette ||
-       num > (unsigned int)PNG_MAX_PALETTE_LENGTH)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "invalid");
-      return;
-   }
-
-   for (i = 0; i < num; i++)
-   {
-      png_byte buf[2];
-
-      png_crc_read(png_ptr, buf, 2);
-      readbuf[i] = png_get_uint_16(buf);
-   }
-
-   if (png_crc_finish(png_ptr, 0) != 0)
-      return;
-
-   png_set_hIST(png_ptr, info_ptr, readbuf);
-}
-#endif
-
-#ifdef PNG_READ_pHYs_SUPPORTED
-void /* PRIVATE */
-png_handle_pHYs(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
-{
-   png_byte buf[9];
-   png_uint_32 res_x, res_y;
-   int unit_type;
-
-   png_debug(1, "in png_handle_pHYs");
-
-   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
-      png_chunk_error(png_ptr, "missing IHDR");
-
-   else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "out of place");
-      return;
-   }
-
-   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_pHYs) != 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "duplicate");
-      return;
-   }
-
-   if (length != 9)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "invalid");
-      return;
-   }
-
-   png_crc_read(png_ptr, buf, 9);
-
-   if (png_crc_finish(png_ptr, 0) != 0)
-      return;
-
-   res_x = png_get_uint_32(buf);
-   res_y = png_get_uint_32(buf + 4);
-   unit_type = buf[8];
-   png_set_pHYs(png_ptr, info_ptr, res_x, res_y, unit_type);
-}
-#endif
-
-#ifdef PNG_READ_oFFs_SUPPORTED
-void /* PRIVATE */
-png_handle_oFFs(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
-{
-   png_byte buf[9];
-   png_int_32 offset_x, offset_y;
-   int unit_type;
-
-   png_debug(1, "in png_handle_oFFs");
-
-   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
-      png_chunk_error(png_ptr, "missing IHDR");
-
-   else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "out of place");
-      return;
-   }
-
-   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_oFFs) != 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "duplicate");
-      return;
-   }
-
-   if (length != 9)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "invalid");
-      return;
-   }
-
-   png_crc_read(png_ptr, buf, 9);
-
-   if (png_crc_finish(png_ptr, 0) != 0)
-      return;
-
-   offset_x = png_get_int_32(buf);
-   offset_y = png_get_int_32(buf + 4);
-   unit_type = buf[8];
-   png_set_oFFs(png_ptr, info_ptr, offset_x, offset_y, unit_type);
-}
-#endif
-
-#ifdef PNG_READ_pCAL_SUPPORTED
-/* Read the pCAL chunk (described in the PNG Extensions document) */
-void /* PRIVATE */
-png_handle_pCAL(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
-{
-   png_int_32 X0, X1;
-   png_byte type, nparams;
-   png_bytep buffer, buf, units, endptr;
-   png_charpp params;
-   int i;
-
-   png_debug(1, "in png_handle_pCAL");
-
-   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
-      png_chunk_error(png_ptr, "missing IHDR");
-
-   else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "out of place");
-      return;
-   }
-
-   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_pCAL) != 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "duplicate");
-      return;
-   }
-
-   png_debug1(2, "Allocating and reading pCAL chunk data (%u bytes)",
-       length + 1);
-
-   buffer = png_read_buffer(png_ptr, length+1, 2/*silent*/);
-
-   if (buffer == NULL)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "out of memory");
-      return;
-   }
-
-   png_crc_read(png_ptr, buffer, length);
-
-   if (png_crc_finish(png_ptr, 0) != 0)
-      return;
-
-   buffer[length] = 0; /* Null terminate the last string */
-
-   png_debug(3, "Finding end of pCAL purpose string");
-   for (buf = buffer; *buf; buf++)
-      /* Empty loop */ ;
-
-   endptr = buffer + length;
-
-   /* We need to have at least 12 bytes after the purpose string
-    * in order to get the parameter information.
-    */
-   if (endptr - buf <= 12)
-   {
-      png_chunk_benign_error(png_ptr, "invalid");
-      return;
-   }
-
-   png_debug(3, "Reading pCAL X0, X1, type, nparams, and units");
-   X0 = png_get_int_32((png_bytep)buf+1);
-   X1 = png_get_int_32((png_bytep)buf+5);
-   type = buf[9];
-   nparams = buf[10];
-   units = buf + 11;
-
-   png_debug(3, "Checking pCAL equation type and number of parameters");
-   /* Check that we have the right number of parameters for known
-    * equation types.
-    */
-   if ((type == PNG_EQUATION_LINEAR && nparams != 2) ||
-       (type == PNG_EQUATION_BASE_E && nparams != 3) ||
-       (type == PNG_EQUATION_ARBITRARY && nparams != 3) ||
-       (type == PNG_EQUATION_HYPERBOLIC && nparams != 4))
-   {
-      png_chunk_benign_error(png_ptr, "invalid parameter count");
-      return;
-   }
-
-   else if (type >= PNG_EQUATION_LAST)
-   {
-      png_chunk_benign_error(png_ptr, "unrecognized equation type");
-   }
-
-   for (buf = units; *buf; buf++)
-      /* Empty loop to move past the units string. */ ;
-
-   png_debug(3, "Allocating pCAL parameters array");
-
-   params = png_voidcast(png_charpp, png_malloc_warn(png_ptr,
-       nparams * (sizeof (png_charp))));
-
-   if (params == NULL)
-   {
-      png_chunk_benign_error(png_ptr, "out of memory");
-      return;
-   }
-
-   /* Get pointers to the start of each parameter string. */
-   for (i = 0; i < nparams; i++)
-   {
-      buf++; /* Skip the null string terminator from previous parameter. */
-
-      png_debug1(3, "Reading pCAL parameter %d", i);
-
-      for (params[i] = (png_charp)buf; buf <= endptr && *buf != 0; buf++)
-         /* Empty loop to move past each parameter string */ ;
-
-      /* Make sure we haven't run out of data yet */
-      if (buf > endptr)
-      {
-         png_free(png_ptr, params);
-         png_chunk_benign_error(png_ptr, "invalid data");
-         return;
-      }
-   }
-
-   png_set_pCAL(png_ptr, info_ptr, (png_charp)buffer, X0, X1, type, nparams,
-       (png_charp)units, params);
-
-   png_free(png_ptr, params);
-}
-#endif
-
-#ifdef PNG_READ_sCAL_SUPPORTED
-/* Read the sCAL chunk */
-void /* PRIVATE */
-png_handle_sCAL(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
-{
-   png_bytep buffer;
-   size_t i;
-   int state;
-
-   png_debug(1, "in png_handle_sCAL");
-
-   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
-      png_chunk_error(png_ptr, "missing IHDR");
-
-   else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "out of place");
-      return;
-   }
-
-   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_sCAL) != 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "duplicate");
-      return;
-   }
-
-   /* Need unit type, width, \0, height: minimum 4 bytes */
-   else if (length < 4)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "invalid");
-      return;
-   }
-
-   png_debug1(2, "Allocating and reading sCAL chunk data (%u bytes)",
-       length + 1);
-
-   buffer = png_read_buffer(png_ptr, length+1, 2/*silent*/);
-
-   if (buffer == NULL)
-   {
-      png_chunk_benign_error(png_ptr, "out of memory");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   png_crc_read(png_ptr, buffer, length);
-   buffer[length] = 0; /* Null terminate the last string */
-
-   if (png_crc_finish(png_ptr, 0) != 0)
-      return;
-
-   /* Validate the unit. */
-   if (buffer[0] != 1 && buffer[0] != 2)
-   {
-      png_chunk_benign_error(png_ptr, "invalid unit");
-      return;
-   }
-
-   /* Validate the ASCII numbers, need two ASCII numbers separated by
-    * a '\0' and they need to fit exactly in the chunk data.
-    */
-   i = 1;
-   state = 0;
-
-   if (png_check_fp_number((png_const_charp)buffer, length, &state, &i) == 0 ||
-       i >= length || buffer[i++] != 0)
-      png_chunk_benign_error(png_ptr, "bad width format");
-
-   else if (PNG_FP_IS_POSITIVE(state) == 0)
-      png_chunk_benign_error(png_ptr, "non-positive width");
-
-   else
-   {
-      size_t heighti = i;
-
-      state = 0;
-      if (png_check_fp_number((png_const_charp)buffer, length,
-          &state, &i) == 0 || i != length)
-         png_chunk_benign_error(png_ptr, "bad height format");
-
-      else if (PNG_FP_IS_POSITIVE(state) == 0)
-         png_chunk_benign_error(png_ptr, "non-positive height");
-
-      else
-         /* This is the (only) success case. */
-         png_set_sCAL_s(png_ptr, info_ptr, buffer[0],
-             (png_charp)buffer+1, (png_charp)buffer+heighti);
-   }
-}
-#endif
-
-#ifdef PNG_READ_tIME_SUPPORTED
-void /* PRIVATE */
-png_handle_tIME(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
-{
-   png_byte buf[7];
-   png_time mod_time;
-
-   png_debug(1, "in png_handle_tIME");
-
-   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
-      png_chunk_error(png_ptr, "missing IHDR");
-
-   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_tIME) != 0)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "duplicate");
-      return;
-   }
-
-   if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
-      png_ptr->mode |= PNG_AFTER_IDAT;
-
-   if (length != 7)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "invalid");
-      return;
-   }
-
-   png_crc_read(png_ptr, buf, 7);
-
-   if (png_crc_finish(png_ptr, 0) != 0)
-      return;
-
-   mod_time.second = buf[6];
-   mod_time.minute = buf[5];
-   mod_time.hour = buf[4];
-   mod_time.day = buf[3];
-   mod_time.month = buf[2];
-   mod_time.year = png_get_uint_16(buf);
-
-   png_set_tIME(png_ptr, info_ptr, &mod_time);
-}
-#endif
-
-#ifdef PNG_READ_tEXt_SUPPORTED
-/* Note: this does not properly handle chunks that are > 64K under DOS */
-void /* PRIVATE */
-png_handle_tEXt(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
-{
-   png_text  text_info;
-   png_bytep buffer;
-   png_charp key;
-   png_charp text;
-   png_uint_32 skip = 0;
-
-   png_debug(1, "in png_handle_tEXt");
-
-#ifdef PNG_USER_LIMITS_SUPPORTED
-   if (png_ptr->user_chunk_cache_max != 0)
-   {
-      if (png_ptr->user_chunk_cache_max == 1)
-      {
-         png_crc_finish(png_ptr, length);
-         return;
-      }
-
-      if (--png_ptr->user_chunk_cache_max == 1)
-      {
-         png_crc_finish(png_ptr, length);
-         png_chunk_benign_error(png_ptr, "no space in chunk cache");
-         return;
-      }
-   }
-#endif
-
-   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
-      png_chunk_error(png_ptr, "missing IHDR");
-
-   if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
-      png_ptr->mode |= PNG_AFTER_IDAT;
-
-#ifdef PNG_MAX_MALLOC_64K
-   if (length > 65535U)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "too large to fit in memory");
-      return;
-   }
-#endif
-
-   buffer = png_read_buffer(png_ptr, length+1, 1/*warn*/);
-
-   if (buffer == NULL)
-   {
-      png_chunk_benign_error(png_ptr, "out of memory");
-      return;
-   }
-
-   png_crc_read(png_ptr, buffer, length);
-
-   if (png_crc_finish(png_ptr, skip) != 0)
-      return;
-
-   key = (png_charp)buffer;
-   key[length] = 0;
-
-   for (text = key; *text; text++)
-      /* Empty loop to find end of key */ ;
-
-   if (text != key + length)
-      text++;
-
-   text_info.compression = PNG_TEXT_COMPRESSION_NONE;
-   text_info.key = key;
-   text_info.lang = NULL;
-   text_info.lang_key = NULL;
-   text_info.itxt_length = 0;
-   text_info.text = text;
-   text_info.text_length = strlen(text);
-
-   if (png_set_text_2(png_ptr, info_ptr, &text_info, 1) != 0)
-      png_warning(png_ptr, "Insufficient memory to process text chunk");
-}
-#endif
-
-#ifdef PNG_READ_zTXt_SUPPORTED
-/* Note: this does not correctly handle chunks that are > 64K under DOS */
-void /* PRIVATE */
-png_handle_zTXt(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
-{
-   png_const_charp errmsg = NULL;
-   png_bytep       buffer;
-   png_uint_32     keyword_length;
-
-   png_debug(1, "in png_handle_zTXt");
-
-#ifdef PNG_USER_LIMITS_SUPPORTED
-   if (png_ptr->user_chunk_cache_max != 0)
-   {
-      if (png_ptr->user_chunk_cache_max == 1)
-      {
-         png_crc_finish(png_ptr, length);
-         return;
-      }
-
-      if (--png_ptr->user_chunk_cache_max == 1)
-      {
-         png_crc_finish(png_ptr, length);
-         png_chunk_benign_error(png_ptr, "no space in chunk cache");
-         return;
-      }
-   }
-#endif
-
-   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
-      png_chunk_error(png_ptr, "missing IHDR");
-
-   if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
-      png_ptr->mode |= PNG_AFTER_IDAT;
-
-   /* Note, "length" is sufficient here; we won't be adding
-    * a null terminator later.
-    */
-   buffer = png_read_buffer(png_ptr, length, 2/*silent*/);
-
-   if (buffer == NULL)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "out of memory");
-      return;
-   }
-
-   png_crc_read(png_ptr, buffer, length);
-
-   if (png_crc_finish(png_ptr, 0) != 0)
-      return;
-
-   /* TODO: also check that the keyword contents match the spec! */
-   for (keyword_length = 0;
-      keyword_length < length && buffer[keyword_length] != 0;
-      ++keyword_length)
-      /* Empty loop to find end of name */ ;
-
-   if (keyword_length > 79 || keyword_length < 1)
-      errmsg = "bad keyword";
-
-   /* zTXt must have some LZ data after the keyword, although it may expand to
-    * zero bytes; we need a '\0' at the end of the keyword, the compression type
-    * then the LZ data:
-    */
-   else if (keyword_length + 3 > length)
-      errmsg = "truncated";
-
-   else if (buffer[keyword_length+1] != PNG_COMPRESSION_TYPE_BASE)
-      errmsg = "unknown compression type";
-
-   else
-   {
-      png_alloc_size_t uncompressed_length = PNG_SIZE_MAX;
-
-      /* TODO: at present png_decompress_chunk imposes a single application
-       * level memory limit, this should be split to different values for iCCP
-       * and text chunks.
-       */
-      if (png_decompress_chunk(png_ptr, length, keyword_length+2,
-          &uncompressed_length, 1/*terminate*/) == Z_STREAM_END)
-      {
-         png_text text;
-
-         if (png_ptr->read_buffer == NULL)
-           errmsg="Read failure in png_handle_zTXt";
-         else
-         {
-            /* It worked; png_ptr->read_buffer now looks like a tEXt chunk
-             * except for the extra compression type byte and the fact that
-             * it isn't necessarily '\0' terminated.
-             */
-            buffer = png_ptr->read_buffer;
-            buffer[uncompressed_length+(keyword_length+2)] = 0;
-
-            text.compression = PNG_TEXT_COMPRESSION_zTXt;
-            text.key = (png_charp)buffer;
-            text.text = (png_charp)(buffer + keyword_length+2);
-            text.text_length = uncompressed_length;
-            text.itxt_length = 0;
-            text.lang = NULL;
-            text.lang_key = NULL;
-
-            if (png_set_text_2(png_ptr, info_ptr, &text, 1) != 0)
-               errmsg = "insufficient memory";
-         }
-      }
-
-      else
-         errmsg = png_ptr->zstream.msg;
-   }
-
-   if (errmsg != NULL)
-      png_chunk_benign_error(png_ptr, errmsg);
-}
-#endif
-
-#ifdef PNG_READ_iTXt_SUPPORTED
-/* Note: this does not correctly handle chunks that are > 64K under DOS */
-void /* PRIVATE */
-png_handle_iTXt(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
-{
-   png_const_charp errmsg = NULL;
-   png_bytep buffer;
-   png_uint_32 prefix_length;
-
-   png_debug(1, "in png_handle_iTXt");
-
-#ifdef PNG_USER_LIMITS_SUPPORTED
-   if (png_ptr->user_chunk_cache_max != 0)
-   {
-      if (png_ptr->user_chunk_cache_max == 1)
-      {
-         png_crc_finish(png_ptr, length);
-         return;
-      }
-
-      if (--png_ptr->user_chunk_cache_max == 1)
-      {
-         png_crc_finish(png_ptr, length);
-         png_chunk_benign_error(png_ptr, "no space in chunk cache");
-         return;
-      }
-   }
-#endif
-
-   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
-      png_chunk_error(png_ptr, "missing IHDR");
-
-   if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
-      png_ptr->mode |= PNG_AFTER_IDAT;
-
-   buffer = png_read_buffer(png_ptr, length+1, 1/*warn*/);
-
-   if (buffer == NULL)
-   {
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "out of memory");
-      return;
-   }
-
-   png_crc_read(png_ptr, buffer, length);
-
-   if (png_crc_finish(png_ptr, 0) != 0)
-      return;
-
-   /* First the keyword. */
-   for (prefix_length=0;
-      prefix_length < length && buffer[prefix_length] != 0;
-      ++prefix_length)
-      /* Empty loop */ ;
-
-   /* Perform a basic check on the keyword length here. */
-   if (prefix_length > 79 || prefix_length < 1)
-      errmsg = "bad keyword";
-
-   /* Expect keyword, compression flag, compression type, language, translated
-    * keyword (both may be empty but are 0 terminated) then the text, which may
-    * be empty.
-    */
-   else if (prefix_length + 5 > length)
-      errmsg = "truncated";
-
-   else if (buffer[prefix_length+1] == 0 ||
-      (buffer[prefix_length+1] == 1 &&
-      buffer[prefix_length+2] == PNG_COMPRESSION_TYPE_BASE))
-   {
-      int compressed = buffer[prefix_length+1] != 0;
-      png_uint_32 language_offset, translated_keyword_offset;
-      png_alloc_size_t uncompressed_length = 0;
-
-      /* Now the language tag */
-      prefix_length += 3;
-      language_offset = prefix_length;
-
-      for (; prefix_length < length && buffer[prefix_length] != 0;
-         ++prefix_length)
-         /* Empty loop */ ;
-
-      /* WARNING: the length may be invalid here, this is checked below. */
-      translated_keyword_offset = ++prefix_length;
-
-      for (; prefix_length < length && buffer[prefix_length] != 0;
-         ++prefix_length)
-         /* Empty loop */ ;
-
-      /* prefix_length should now be at the trailing '\0' of the translated
-       * keyword, but it may already be over the end.  None of this arithmetic
-       * can overflow because chunks are at most 2^31 bytes long, but on 16-bit
-       * systems the available allocation may overflow.
-       */
-      ++prefix_length;
-
-      if (compressed == 0 && prefix_length <= length)
-         uncompressed_length = length - prefix_length;
-
-      else if (compressed != 0 && prefix_length < length)
-      {
-         uncompressed_length = PNG_SIZE_MAX;
-
-         /* TODO: at present png_decompress_chunk imposes a single application
-          * level memory limit, this should be split to different values for
-          * iCCP and text chunks.
-          */
-         if (png_decompress_chunk(png_ptr, length, prefix_length,
-             &uncompressed_length, 1/*terminate*/) == Z_STREAM_END)
-            buffer = png_ptr->read_buffer;
-
-         else
-            errmsg = png_ptr->zstream.msg;
-      }
-
-      else
-         errmsg = "truncated";
-
-      if (errmsg == NULL)
-      {
-         png_text text;
-
-         buffer[uncompressed_length+prefix_length] = 0;
-
-         if (compressed == 0)
-            text.compression = PNG_ITXT_COMPRESSION_NONE;
-
-         else
-            text.compression = PNG_ITXT_COMPRESSION_zTXt;
-
-         text.key = (png_charp)buffer;
-         text.lang = (png_charp)buffer + language_offset;
-         text.lang_key = (png_charp)buffer + translated_keyword_offset;
-         text.text = (png_charp)buffer + prefix_length;
-         text.text_length = 0;
-         text.itxt_length = uncompressed_length;
-
-         if (png_set_text_2(png_ptr, info_ptr, &text, 1) != 0)
-            errmsg = "insufficient memory";
-      }
-   }
-
-   else
-      errmsg = "bad compression info";
-
-   if (errmsg != NULL)
-      png_chunk_benign_error(png_ptr, errmsg);
-}
-#endif
-
-#ifdef PNG_READ_UNKNOWN_CHUNKS_SUPPORTED
-/* Utility function for png_handle_unknown; set up png_ptr::unknown_chunk */
-static int
-png_cache_unknown_chunk(png_structrp png_ptr, png_uint_32 length)
-{
-   png_alloc_size_t limit = PNG_SIZE_MAX;
-
-   if (png_ptr->unknown_chunk.data != NULL)
-   {
-      png_free(png_ptr, png_ptr->unknown_chunk.data);
-      png_ptr->unknown_chunk.data = NULL;
-   }
-
-#  ifdef PNG_SET_USER_LIMITS_SUPPORTED
-   if (png_ptr->user_chunk_malloc_max > 0 &&
-       png_ptr->user_chunk_malloc_max < limit)
-      limit = png_ptr->user_chunk_malloc_max;
-
-#  elif PNG_USER_CHUNK_MALLOC_MAX > 0
-   if (PNG_USER_CHUNK_MALLOC_MAX < limit)
-      limit = PNG_USER_CHUNK_MALLOC_MAX;
-#  endif
-
-   if (length <= limit)
-   {
-      PNG_CSTRING_FROM_CHUNK(png_ptr->unknown_chunk.name, png_ptr->chunk_name);
-      /* The following is safe because of the PNG_SIZE_MAX init above */
-      png_ptr->unknown_chunk.size = (size_t)length/*SAFE*/;
-      /* 'mode' is a flag array, only the bottom four bits matter here */
-      png_ptr->unknown_chunk.location = (png_byte)png_ptr->mode/*SAFE*/;
-
-      if (length == 0)
-         png_ptr->unknown_chunk.data = NULL;
-
-      else
-      {
-         /* Do a 'warn' here - it is handled below. */
-         png_ptr->unknown_chunk.data = png_voidcast(png_bytep,
-             png_malloc_warn(png_ptr, length));
-      }
-   }
-
-   if (png_ptr->unknown_chunk.data == NULL && length > 0)
-   {
-      /* This is benign because we clean up correctly */
-      png_crc_finish(png_ptr, length);
-      png_chunk_benign_error(png_ptr, "unknown chunk exceeds memory limits");
-      return 0;
-   }
-
-   else
-   {
-      if (length > 0)
-         png_crc_read(png_ptr, png_ptr->unknown_chunk.data, length);
-      png_crc_finish(png_ptr, 0);
-      return 1;
-   }
-}
-#endif /* READ_UNKNOWN_CHUNKS */
-
-/* Handle an unknown, or known but disabled, chunk */
-void /* PRIVATE */
-png_handle_unknown(png_structrp png_ptr, png_inforp info_ptr,
-    png_uint_32 length, int keep)
-{
-   int handled = 0; /* the chunk was handled */
-
-   png_debug(1, "in png_handle_unknown");
-
-#ifdef PNG_READ_UNKNOWN_CHUNKS_SUPPORTED
-   /* NOTE: this code is based on the code in libpng-1.4.12 except for fixing
-    * the bug which meant that setting a non-default behavior for a specific
-    * chunk would be ignored (the default was always used unless a user
-    * callback was installed).
-    *
-    * 'keep' is the value from the png_chunk_unknown_handling, the setting for
-    * this specific chunk_name, if PNG_HANDLE_AS_UNKNOWN_SUPPORTED, if not it
-    * will always be PNG_HANDLE_CHUNK_AS_DEFAULT and it needs to be set here.
-    * This is just an optimization to avoid multiple calls to the lookup
-    * function.
-    */
-#  ifndef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-#     ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED
-   keep = png_chunk_unknown_handling(png_ptr, png_ptr->chunk_name);
-#     endif
-#  endif
-
-   /* One of the following methods will read the chunk or skip it (at least one
-    * of these is always defined because this is the only way to switch on
-    * PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
-    */
-#  ifdef PNG_READ_USER_CHUNKS_SUPPORTED
-   /* The user callback takes precedence over the chunk keep value, but the
-    * keep value is still required to validate a save of a critical chunk.
-    */
-   if (png_ptr->read_user_chunk_fn != NULL)
-   {
-      if (png_cache_unknown_chunk(png_ptr, length) != 0)
-      {
-         /* Callback to user unknown chunk handler */
-         int ret = (*(png_ptr->read_user_chunk_fn))(png_ptr,
-             &png_ptr->unknown_chunk);
-
-         /* ret is:
-          * negative: An error occurred; png_chunk_error will be called.
-          *     zero: The chunk was not handled, the chunk will be discarded
-          *           unless png_set_keep_unknown_chunks has been used to set
-          *           a 'keep' behavior for this particular chunk, in which
-          *           case that will be used.  A critical chunk will cause an
-          *           error at this point unless it is to be saved.
-          * positive: The chunk was handled, libpng will ignore/discard it.
-          */
-         if (ret < 0)
-            png_chunk_error(png_ptr, "error in user chunk");
-
-         else if (ret == 0)
-         {
-            /* If the keep value is 'default' or 'never' override it, but
-             * still error out on critical chunks unless the keep value is
-             * 'always'  While this is weird it is the behavior in 1.4.12.
-             * A possible improvement would be to obey the value set for the
-             * chunk, but this would be an API change that would probably
-             * damage some applications.
-             *
-             * The png_app_warning below catches the case that matters, where
-             * the application has not set specific save or ignore for this
-             * chunk or global save or ignore.
-             */
-            if (keep < PNG_HANDLE_CHUNK_IF_SAFE)
-            {
-#              ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED
-               if (png_ptr->unknown_default < PNG_HANDLE_CHUNK_IF_SAFE)
-               {
-                  png_chunk_warning(png_ptr, "Saving unknown chunk:");
-                  png_app_warning(png_ptr,
-                      "forcing save of an unhandled chunk;"
-                      " please call png_set_keep_unknown_chunks");
-                      /* with keep = PNG_HANDLE_CHUNK_IF_SAFE */
-               }
-#              endif
-               keep = PNG_HANDLE_CHUNK_IF_SAFE;
-            }
-         }
-
-         else /* chunk was handled */
-         {
-            handled = 1;
-            /* Critical chunks can be safely discarded at this point. */
-            keep = PNG_HANDLE_CHUNK_NEVER;
-         }
-      }
-
-      else
-         keep = PNG_HANDLE_CHUNK_NEVER; /* insufficient memory */
-   }
-
-   else
-   /* Use the SAVE_UNKNOWN_CHUNKS code or skip the chunk */
-#  endif /* READ_USER_CHUNKS */
-
-#  ifdef PNG_SAVE_UNKNOWN_CHUNKS_SUPPORTED
-   {
-      /* keep is currently just the per-chunk setting, if there was no
-       * setting change it to the global default now (not that this may
-       * still be AS_DEFAULT) then obtain the cache of the chunk if required,
-       * if not simply skip the chunk.
-       */
-      if (keep == PNG_HANDLE_CHUNK_AS_DEFAULT)
-         keep = png_ptr->unknown_default;
-
-      if (keep == PNG_HANDLE_CHUNK_ALWAYS ||
-         (keep == PNG_HANDLE_CHUNK_IF_SAFE &&
-          PNG_CHUNK_ANCILLARY(png_ptr->chunk_name)))
-      {
-         if (png_cache_unknown_chunk(png_ptr, length) == 0)
-            keep = PNG_HANDLE_CHUNK_NEVER;
-      }
-
-      else
-         png_crc_finish(png_ptr, length);
-   }
-#  else
-#     ifndef PNG_READ_USER_CHUNKS_SUPPORTED
-#        error no method to support READ_UNKNOWN_CHUNKS
-#     endif
-
-   {
-      /* If here there is no read callback pointer set and no support is
-       * compiled in to just save the unknown chunks, so simply skip this
-       * chunk.  If 'keep' is something other than AS_DEFAULT or NEVER then
-       * the app has erroneously asked for unknown chunk saving when there
-       * is no support.
-       */
-      if (keep > PNG_HANDLE_CHUNK_NEVER)
-         png_app_error(png_ptr, "no unknown chunk support available");
-
-      png_crc_finish(png_ptr, length);
-   }
-#  endif
-
-#  ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED
-   /* Now store the chunk in the chunk list if appropriate, and if the limits
-    * permit it.
-    */
-   if (keep == PNG_HANDLE_CHUNK_ALWAYS ||
-      (keep == PNG_HANDLE_CHUNK_IF_SAFE &&
-       PNG_CHUNK_ANCILLARY(png_ptr->chunk_name)))
-   {
-#     ifdef PNG_USER_LIMITS_SUPPORTED
-      switch (png_ptr->user_chunk_cache_max)
-      {
-         case 2:
-            png_ptr->user_chunk_cache_max = 1;
-            png_chunk_benign_error(png_ptr, "no space in chunk cache");
-            /* FALLTHROUGH */
-         case 1:
-            /* NOTE: prior to 1.6.0 this case resulted in an unknown critical
-             * chunk being skipped, now there will be a hard error below.
-             */
-            break;
-
-         default: /* not at limit */
-            --(png_ptr->user_chunk_cache_max);
-            /* FALLTHROUGH */
-         case 0: /* no limit */
-#  endif /* USER_LIMITS */
-            /* Here when the limit isn't reached or when limits are compiled
-             * out; store the chunk.
-             */
-            png_set_unknown_chunks(png_ptr, info_ptr,
-                &png_ptr->unknown_chunk, 1);
-            handled = 1;
-#  ifdef PNG_USER_LIMITS_SUPPORTED
-            break;
-      }
-#  endif
-   }
-#  else /* no store support: the chunk must be handled by the user callback */
-   PNG_UNUSED(info_ptr)
-#  endif
-
-   /* Regardless of the error handling below the cached data (if any) can be
-    * freed now.  Notice that the data is not freed if there is a png_error, but
-    * it will be freed by destroy_read_struct.
-    */
-   if (png_ptr->unknown_chunk.data != NULL)
-      png_free(png_ptr, png_ptr->unknown_chunk.data);
-   png_ptr->unknown_chunk.data = NULL;
-
-#else /* !PNG_READ_UNKNOWN_CHUNKS_SUPPORTED */
-   /* There is no support to read an unknown chunk, so just skip it. */
-   png_crc_finish(png_ptr, length);
-   PNG_UNUSED(info_ptr)
-   PNG_UNUSED(keep)
-#endif /* !READ_UNKNOWN_CHUNKS */
-
-   /* Check for unhandled critical chunks */
-   if (handled == 0 && PNG_CHUNK_CRITICAL(png_ptr->chunk_name))
-      png_chunk_error(png_ptr, "unhandled critical chunk");
-}
-
-/* This function is called to verify that a chunk name is valid.
- * This function can't have the "critical chunk check" incorporated
- * into it, since in the future we will need to be able to call user
- * functions to handle unknown critical chunks after we check that
- * the chunk name itself is valid.
- */
-
-/* Bit hacking: the test for an invalid byte in the 4 byte chunk name is:
- *
- * ((c) < 65 || (c) > 122 || ((c) > 90 && (c) < 97))
- */
-
-void /* PRIVATE */
-png_check_chunk_name(png_const_structrp png_ptr, png_uint_32 chunk_name)
-{
-   int i;
-   png_uint_32 cn=chunk_name;
-
-   png_debug(1, "in png_check_chunk_name");
-
-   for (i=1; i<=4; ++i)
-   {
-      int c = cn & 0xff;
-
-      if (c < 65 || c > 122 || (c > 90 && c < 97))
-         png_chunk_error(png_ptr, "invalid chunk type");
-
-      cn >>= 8;
-   }
-}
-
-void /* PRIVATE */
-png_check_chunk_length(png_const_structrp png_ptr, png_uint_32 length)
-{
-   png_alloc_size_t limit = PNG_UINT_31_MAX;
-
-# ifdef PNG_SET_USER_LIMITS_SUPPORTED
-   if (png_ptr->user_chunk_malloc_max > 0 &&
-       png_ptr->user_chunk_malloc_max < limit)
-      limit = png_ptr->user_chunk_malloc_max;
-# elif PNG_USER_CHUNK_MALLOC_MAX > 0
-   if (PNG_USER_CHUNK_MALLOC_MAX < limit)
-      limit = PNG_USER_CHUNK_MALLOC_MAX;
-# endif
-   if (png_ptr->chunk_name == png_IDAT)
-   {
-      png_alloc_size_t idat_limit = PNG_UINT_31_MAX;
-      size_t row_factor =
-         (size_t)png_ptr->width
-         * (size_t)png_ptr->channels
-         * (png_ptr->bit_depth > 8? 2: 1)
-         + 1
-         + (png_ptr->interlaced? 6: 0);
-      if (png_ptr->height > PNG_UINT_32_MAX/row_factor)
-         idat_limit = PNG_UINT_31_MAX;
-      else
-         idat_limit = png_ptr->height * row_factor;
-      row_factor = row_factor > 32566? 32566 : row_factor;
-      idat_limit += 6 + 5*(idat_limit/row_factor+1); /* zlib+deflate overhead */
-      idat_limit=idat_limit < PNG_UINT_31_MAX? idat_limit : PNG_UINT_31_MAX;
-      limit = limit < idat_limit? idat_limit : limit;
-   }
-
-   if (length > limit)
-   {
-      png_debug2(0," length = %lu, limit = %lu",
-         (unsigned long)length,(unsigned long)limit);
-      png_benign_error(png_ptr, "chunk data is too large");
-   }
-}
-
-/* Combines the row recently read in with the existing pixels in the row.  This
- * routine takes care of alpha and transparency if requested.  This routine also
- * handles the two methods of progressive display of interlaced images,
- * depending on the 'display' value; if 'display' is true then the whole row
- * (dp) is filled from the start by replicating the available pixels.  If
- * 'display' is false only those pixels present in the pass are filled in.
- */
-void /* PRIVATE */
-png_combine_row(png_const_structrp png_ptr, png_bytep dp, int display)
-{
-   unsigned int pixel_depth = png_ptr->transformed_pixel_depth;
-   png_const_bytep sp = png_ptr->row_buf + 1;
-   png_alloc_size_t row_width = png_ptr->width;
-   unsigned int pass = png_ptr->pass;
-   png_bytep end_ptr = 0;
-   png_byte end_byte = 0;
-   unsigned int end_mask;
-
-   png_debug(1, "in png_combine_row");
-
-   /* Added in 1.5.6: it should not be possible to enter this routine until at
-    * least one row has been read from the PNG data and transformed.
-    */
-   if (pixel_depth == 0)
-      png_error(png_ptr, "internal row logic error");
-
-   /* Added in 1.5.4: the pixel depth should match the information returned by
-    * any call to png_read_update_info at this point.  Do not continue if we got
-    * this wrong.
-    */
-   if (png_ptr->info_rowbytes != 0 && png_ptr->info_rowbytes !=
-          PNG_ROWBYTES(pixel_depth, row_width))
-      png_error(png_ptr, "internal row size calculation error");
-
-   /* Don't expect this to ever happen: */
-   if (row_width == 0)
-      png_error(png_ptr, "internal row width error");
-
-   /* Preserve the last byte in cases where only part of it will be overwritten,
-    * the multiply below may overflow, we don't care because ANSI-C guarantees
-    * we get the low bits.
-    */
-   end_mask = (pixel_depth * row_width) & 7;
-   if (end_mask != 0)
-   {
-      /* end_ptr == NULL is a flag to say do nothing */
-      end_ptr = dp + PNG_ROWBYTES(pixel_depth, row_width) - 1;
-      end_byte = *end_ptr;
-#     ifdef PNG_READ_PACKSWAP_SUPPORTED
-      if ((png_ptr->transformations & PNG_PACKSWAP) != 0)
-         /* little-endian byte */
-         end_mask = (unsigned int)(0xff << end_mask);
-
-      else /* big-endian byte */
-#     endif
-      end_mask = 0xff >> end_mask;
-      /* end_mask is now the bits to *keep* from the destination row */
-   }
-
-   /* For non-interlaced images this reduces to a memcpy(). A memcpy()
-    * will also happen if interlacing isn't supported or if the application
-    * does not call png_set_interlace_handling().  In the latter cases the
-    * caller just gets a sequence of the unexpanded rows from each interlace
-    * pass.
-    */
-#ifdef PNG_READ_INTERLACING_SUPPORTED
-   if (png_ptr->interlaced != 0 &&
-       (png_ptr->transformations & PNG_INTERLACE) != 0 &&
-       pass < 6 && (display == 0 ||
-       /* The following copies everything for 'display' on passes 0, 2 and 4. */
-       (display == 1 && (pass & 1) != 0)))
-   {
-      /* Narrow images may have no bits in a pass; the caller should handle
-       * this, but this test is cheap:
-       */
-      if (row_width <= PNG_PASS_START_COL(pass))
-         return;
-
-      if (pixel_depth < 8)
-      {
-         /* For pixel depths up to 4 bpp the 8-pixel mask can be expanded to fit
-          * into 32 bits, then a single loop over the bytes using the four byte
-          * values in the 32-bit mask can be used.  For the 'display' option the
-          * expanded mask may also not require any masking within a byte.  To
-          * make this work the PACKSWAP option must be taken into account - it
-          * simply requires the pixels to be reversed in each byte.
-          *
-          * The 'regular' case requires a mask for each of the first 6 passes,
-          * the 'display' case does a copy for the even passes in the range
-          * 0..6.  This has already been handled in the test above.
-          *
-          * The masks are arranged as four bytes with the first byte to use in
-          * the lowest bits (little-endian) regardless of the order (PACKSWAP or
-          * not) of the pixels in each byte.
-          *
-          * NOTE: the whole of this logic depends on the caller of this function
-          * only calling it on rows appropriate to the pass.  This function only
-          * understands the 'x' logic; the 'y' logic is handled by the caller.
-          *
-          * The following defines allow generation of compile time constant bit
-          * masks for each pixel depth and each possibility of swapped or not
-          * swapped bytes.  Pass 'p' is in the range 0..6; 'x', a pixel index,
-          * is in the range 0..7; and the result is 1 if the pixel is to be
-          * copied in the pass, 0 if not.  'S' is for the sparkle method, 'B'
-          * for the block method.
-          *
-          * With some compilers a compile time expression of the general form:
-          *
-          *    (shift >= 32) ? (a >> (shift-32)) : (b >> shift)
-          *
-          * Produces warnings with values of 'shift' in the range 33 to 63
-          * because the right hand side of the ?: expression is evaluated by
-          * the compiler even though it isn't used.  Microsoft Visual C (various
-          * versions) and the Intel C compiler are known to do this.  To avoid
-          * this the following macros are used in 1.5.6.  This is a temporary
-          * solution to avoid destabilizing the code during the release process.
-          */
-#        if PNG_USE_COMPILE_TIME_MASKS
-#           define PNG_LSR(x,s) ((x)>>((s) & 0x1f))
-#           define PNG_LSL(x,s) ((x)<<((s) & 0x1f))
-#        else
-#           define PNG_LSR(x,s) ((x)>>(s))
-#           define PNG_LSL(x,s) ((x)<<(s))
-#        endif
-#        define S_COPY(p,x) (((p)<4 ? PNG_LSR(0x80088822,(3-(p))*8+(7-(x))) :\
-           PNG_LSR(0xaa55ff00,(7-(p))*8+(7-(x)))) & 1)
-#        define B_COPY(p,x) (((p)<4 ? PNG_LSR(0xff0fff33,(3-(p))*8+(7-(x))) :\
-           PNG_LSR(0xff55ff00,(7-(p))*8+(7-(x)))) & 1)
-
-         /* Return a mask for pass 'p' pixel 'x' at depth 'd'.  The mask is
-          * little endian - the first pixel is at bit 0 - however the extra
-          * parameter 's' can be set to cause the mask position to be swapped
-          * within each byte, to match the PNG format.  This is done by XOR of
-          * the shift with 7, 6 or 4 for bit depths 1, 2 and 4.
-          */
-#        define PIXEL_MASK(p,x,d,s) \
-            (PNG_LSL(((PNG_LSL(1U,(d)))-1),(((x)*(d))^((s)?8-(d):0))))
-
-         /* Hence generate the appropriate 'block' or 'sparkle' pixel copy mask.
-          */
-#        define S_MASKx(p,x,d,s) (S_COPY(p,x)?PIXEL_MASK(p,x,d,s):0)
-#        define B_MASKx(p,x,d,s) (B_COPY(p,x)?PIXEL_MASK(p,x,d,s):0)
-
-         /* Combine 8 of these to get the full mask.  For the 1-bpp and 2-bpp
-          * cases the result needs replicating, for the 4-bpp case the above
-          * generates a full 32 bits.
-          */
-#        define MASK_EXPAND(m,d) ((m)*((d)==1?0x01010101:((d)==2?0x00010001:1)))
-
-#        define S_MASK(p,d,s) MASK_EXPAND(S_MASKx(p,0,d,s) + S_MASKx(p,1,d,s) +\
-            S_MASKx(p,2,d,s) + S_MASKx(p,3,d,s) + S_MASKx(p,4,d,s) +\
-            S_MASKx(p,5,d,s) + S_MASKx(p,6,d,s) + S_MASKx(p,7,d,s), d)
-
-#        define B_MASK(p,d,s) MASK_EXPAND(B_MASKx(p,0,d,s) + B_MASKx(p,1,d,s) +\
-            B_MASKx(p,2,d,s) + B_MASKx(p,3,d,s) + B_MASKx(p,4,d,s) +\
-            B_MASKx(p,5,d,s) + B_MASKx(p,6,d,s) + B_MASKx(p,7,d,s), d)
-
-#if PNG_USE_COMPILE_TIME_MASKS
-         /* Utility macros to construct all the masks for a depth/swap
-          * combination.  The 's' parameter says whether the format is PNG
-          * (big endian bytes) or not.  Only the three odd-numbered passes are
-          * required for the display/block algorithm.
-          */
-#        define S_MASKS(d,s) { S_MASK(0,d,s), S_MASK(1,d,s), S_MASK(2,d,s),\
-            S_MASK(3,d,s), S_MASK(4,d,s), S_MASK(5,d,s) }
-
-#        define B_MASKS(d,s) { B_MASK(1,d,s), B_MASK(3,d,s), B_MASK(5,d,s) }
-
-#        define DEPTH_INDEX(d) ((d)==1?0:((d)==2?1:2))
-
-         /* Hence the pre-compiled masks indexed by PACKSWAP (or not), depth and
-          * then pass:
-          */
-         static const png_uint_32 row_mask[2/*PACKSWAP*/][3/*depth*/][6] =
-         {
-            /* Little-endian byte masks for PACKSWAP */
-            { S_MASKS(1,0), S_MASKS(2,0), S_MASKS(4,0) },
-            /* Normal (big-endian byte) masks - PNG format */
-            { S_MASKS(1,1), S_MASKS(2,1), S_MASKS(4,1) }
-         };
-
-         /* display_mask has only three entries for the odd passes, so index by
-          * pass>>1.
-          */
-         static const png_uint_32 display_mask[2][3][3] =
-         {
-            /* Little-endian byte masks for PACKSWAP */
-            { B_MASKS(1,0), B_MASKS(2,0), B_MASKS(4,0) },
-            /* Normal (big-endian byte) masks - PNG format */
-            { B_MASKS(1,1), B_MASKS(2,1), B_MASKS(4,1) }
-         };
-
-#        define MASK(pass,depth,display,png)\
-            ((display)?display_mask[png][DEPTH_INDEX(depth)][pass>>1]:\
-               row_mask[png][DEPTH_INDEX(depth)][pass])
-
-#else /* !PNG_USE_COMPILE_TIME_MASKS */
-         /* This is the runtime alternative: it seems unlikely that this will
-          * ever be either smaller or faster than the compile time approach.
-          */
-#        define MASK(pass,depth,display,png)\
-            ((display)?B_MASK(pass,depth,png):S_MASK(pass,depth,png))
-#endif /* !USE_COMPILE_TIME_MASKS */
-
-         /* Use the appropriate mask to copy the required bits.  In some cases
-          * the byte mask will be 0 or 0xff; optimize these cases.  row_width is
-          * the number of pixels, but the code copies bytes, so it is necessary
-          * to special case the end.
-          */
-         png_uint_32 pixels_per_byte = 8 / pixel_depth;
-         png_uint_32 mask;
-
-#        ifdef PNG_READ_PACKSWAP_SUPPORTED
-         if ((png_ptr->transformations & PNG_PACKSWAP) != 0)
-            mask = MASK(pass, pixel_depth, display, 0);
-
-         else
-#        endif
-         mask = MASK(pass, pixel_depth, display, 1);
-
-         for (;;)
-         {
-            png_uint_32 m;
-
-            /* It doesn't matter in the following if png_uint_32 has more than
-             * 32 bits because the high bits always match those in m<<24; it is,
-             * however, essential to use OR here, not +, because of this.
-             */
-            m = mask;
-            mask = (m >> 8) | (m << 24); /* rotate right to good compilers */
-            m &= 0xff;
-
-            if (m != 0) /* something to copy */
-            {
-               if (m != 0xff)
-                  *dp = (png_byte)((*dp & ~m) | (*sp & m));
-               else
-                  *dp = *sp;
-            }
-
-            /* NOTE: this may overwrite the last byte with garbage if the image
-             * is not an exact number of bytes wide; libpng has always done
-             * this.
-             */
-            if (row_width <= pixels_per_byte)
-               break; /* May need to restore part of the last byte */
-
-            row_width -= pixels_per_byte;
-            ++dp;
-            ++sp;
-         }
-      }
-
-      else /* pixel_depth >= 8 */
-      {
-         unsigned int bytes_to_copy, bytes_to_jump;
-
-         /* Validate the depth - it must be a multiple of 8 */
-         if (pixel_depth & 7)
-            png_error(png_ptr, "invalid user transform pixel depth");
-
-         pixel_depth >>= 3; /* now in bytes */
-         row_width *= pixel_depth;
-
-         /* Regardless of pass number the Adam 7 interlace always results in a
-          * fixed number of pixels to copy then to skip.  There may be a
-          * different number of pixels to skip at the start though.
-          */
-         {
-            unsigned int offset = PNG_PASS_START_COL(pass) * pixel_depth;
-
-            row_width -= offset;
-            dp += offset;
-            sp += offset;
-         }
-
-         /* Work out the bytes to copy. */
-         if (display != 0)
-         {
-            /* When doing the 'block' algorithm the pixel in the pass gets
-             * replicated to adjacent pixels.  This is why the even (0,2,4,6)
-             * passes are skipped above - the entire expanded row is copied.
-             */
-            bytes_to_copy = (1<<((6-pass)>>1)) * pixel_depth;
-
-            /* But don't allow this number to exceed the actual row width. */
-            if (bytes_to_copy > row_width)
-               bytes_to_copy = (unsigned int)/*SAFE*/row_width;
-         }
-
-         else /* normal row; Adam7 only ever gives us one pixel to copy. */
-            bytes_to_copy = pixel_depth;
-
-         /* In Adam7 there is a constant offset between where the pixels go. */
-         bytes_to_jump = PNG_PASS_COL_OFFSET(pass) * pixel_depth;
-
-         /* And simply copy these bytes.  Some optimization is possible here,
-          * depending on the value of 'bytes_to_copy'.  Special case the low
-          * byte counts, which we know to be frequent.
-          *
-          * Notice that these cases all 'return' rather than 'break' - this
-          * avoids an unnecessary test on whether to restore the last byte
-          * below.
-          */
-         switch (bytes_to_copy)
-         {
-            case 1:
-               for (;;)
-               {
-                  *dp = *sp;
-
-                  if (row_width <= bytes_to_jump)
-                     return;
-
-                  dp += bytes_to_jump;
-                  sp += bytes_to_jump;
-                  row_width -= bytes_to_jump;
-               }
-
-            case 2:
-               /* There is a possibility of a partial copy at the end here; this
-                * slows the code down somewhat.
-                */
-               do
-               {
-                  dp[0] = sp[0]; dp[1] = sp[1];
-
-                  if (row_width <= bytes_to_jump)
-                     return;
-
-                  sp += bytes_to_jump;
-                  dp += bytes_to_jump;
-                  row_width -= bytes_to_jump;
-               }
-               while (row_width > 1);
-
-               /* And there can only be one byte left at this point: */
-               *dp = *sp;
-               return;
-
-            case 3:
-               /* This can only be the RGB case, so each copy is exactly one
-                * pixel and it is not necessary to check for a partial copy.
-                */
-               for (;;)
-               {
-                  dp[0] = sp[0]; dp[1] = sp[1]; dp[2] = sp[2];
-
-                  if (row_width <= bytes_to_jump)
-                     return;
-
-                  sp += bytes_to_jump;
-                  dp += bytes_to_jump;
-                  row_width -= bytes_to_jump;
-               }
-
-            default:
-#if PNG_ALIGN_TYPE != PNG_ALIGN_NONE
-               /* Check for double byte alignment and, if possible, use a
-                * 16-bit copy.  Don't attempt this for narrow images - ones that
-                * are less than an interlace panel wide.  Don't attempt it for
-                * wide bytes_to_copy either - use the memcpy there.
-                */
-               if (bytes_to_copy < 16 /*else use memcpy*/ &&
-                   png_isaligned(dp, png_uint_16) &&
-                   png_isaligned(sp, png_uint_16) &&
-                   bytes_to_copy % (sizeof (png_uint_16)) == 0 &&
-                   bytes_to_jump % (sizeof (png_uint_16)) == 0)
-               {
-                  /* Everything is aligned for png_uint_16 copies, but try for
-                   * png_uint_32 first.
-                   */
-                  if (png_isaligned(dp, png_uint_32) &&
-                      png_isaligned(sp, png_uint_32) &&
-                      bytes_to_copy % (sizeof (png_uint_32)) == 0 &&
-                      bytes_to_jump % (sizeof (png_uint_32)) == 0)
-                  {
-                     png_uint_32p dp32 = png_aligncast(png_uint_32p,dp);
-                     png_const_uint_32p sp32 = png_aligncastconst(
-                         png_const_uint_32p, sp);
-                     size_t skip = (bytes_to_jump-bytes_to_copy) /
-                         (sizeof (png_uint_32));
-
-                     do
-                     {
-                        size_t c = bytes_to_copy;
-                        do
-                        {
-                           *dp32++ = *sp32++;
-                           c -= (sizeof (png_uint_32));
-                        }
-                        while (c > 0);
-
-                        if (row_width <= bytes_to_jump)
-                           return;
-
-                        dp32 += skip;
-                        sp32 += skip;
-                        row_width -= bytes_to_jump;
-                     }
-                     while (bytes_to_copy <= row_width);
-
-                     /* Get to here when the row_width truncates the final copy.
-                      * There will be 1-3 bytes left to copy, so don't try the
-                      * 16-bit loop below.
-                      */
-                     dp = (png_bytep)dp32;
-                     sp = (png_const_bytep)sp32;
-                     do
-                        *dp++ = *sp++;
-                     while (--row_width > 0);
-                     return;
-                  }
-
-                  /* Else do it in 16-bit quantities, but only if the size is
-                   * not too large.
-                   */
-                  else
-                  {
-                     png_uint_16p dp16 = png_aligncast(png_uint_16p, dp);
-                     png_const_uint_16p sp16 = png_aligncastconst(
-                        png_const_uint_16p, sp);
-                     size_t skip = (bytes_to_jump-bytes_to_copy) /
-                        (sizeof (png_uint_16));
-
-                     do
-                     {
-                        size_t c = bytes_to_copy;
-                        do
-                        {
-                           *dp16++ = *sp16++;
-                           c -= (sizeof (png_uint_16));
-                        }
-                        while (c > 0);
-
-                        if (row_width <= bytes_to_jump)
-                           return;
-
-                        dp16 += skip;
-                        sp16 += skip;
-                        row_width -= bytes_to_jump;
-                     }
-                     while (bytes_to_copy <= row_width);
-
-                     /* End of row - 1 byte left, bytes_to_copy > row_width: */
-                     dp = (png_bytep)dp16;
-                     sp = (png_const_bytep)sp16;
-                     do
-                        *dp++ = *sp++;
-                     while (--row_width > 0);
-                     return;
-                  }
-               }
-#endif /* ALIGN_TYPE code */
-
-               /* The true default - use a memcpy: */
-               for (;;)
-               {
-                  memcpy(dp, sp, bytes_to_copy);
-
-                  if (row_width <= bytes_to_jump)
-                     return;
-
-                  sp += bytes_to_jump;
-                  dp += bytes_to_jump;
-                  row_width -= bytes_to_jump;
-                  if (bytes_to_copy > row_width)
-                     bytes_to_copy = (unsigned int)/*SAFE*/row_width;
-               }
-         }
-
-         /* NOT REACHED*/
-      } /* pixel_depth >= 8 */
-
-      /* Here if pixel_depth < 8 to check 'end_ptr' below. */
-   }
-   else
-#endif /* READ_INTERLACING */
-
-   /* If here then the switch above wasn't used so just memcpy the whole row
-    * from the temporary row buffer (notice that this overwrites the end of the
-    * destination row if it is a partial byte.)
-    */
-   memcpy(dp, sp, PNG_ROWBYTES(pixel_depth, row_width));
-
-   /* Restore the overwritten bits from the last byte if necessary. */
-   if (end_ptr != NULL)
-      *end_ptr = (png_byte)((end_byte & end_mask) | (*end_ptr & ~end_mask));
-}
-
-#ifdef PNG_READ_INTERLACING_SUPPORTED
-void /* PRIVATE */
-png_do_read_interlace(png_row_infop row_info, png_bytep row, int pass,
-    png_uint_32 transformations /* Because these may affect the byte layout */)
-{
-   /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */
-   /* Offset to next interlace block */
-   static const unsigned int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
-
-   png_debug(1, "in png_do_read_interlace");
-   if (row != NULL && row_info != NULL)
-   {
-      png_uint_32 final_width;
-
-      final_width = row_info->width * png_pass_inc[pass];
-
-      switch (row_info->pixel_depth)
-      {
-         case 1:
-         {
-            png_bytep sp = row + (size_t)((row_info->width - 1) >> 3);
-            png_bytep dp = row + (size_t)((final_width - 1) >> 3);
-            unsigned int sshift, dshift;
-            unsigned int s_start, s_end;
-            int s_inc;
-            int jstop = (int)png_pass_inc[pass];
-            png_byte v;
-            png_uint_32 i;
-            int j;
-
-#ifdef PNG_READ_PACKSWAP_SUPPORTED
-            if ((transformations & PNG_PACKSWAP) != 0)
-            {
-                sshift = ((row_info->width + 7) & 0x07);
-                dshift = ((final_width + 7) & 0x07);
-                s_start = 7;
-                s_end = 0;
-                s_inc = -1;
-            }
-
-            else
-#endif
-            {
-                sshift = 7 - ((row_info->width + 7) & 0x07);
-                dshift = 7 - ((final_width + 7) & 0x07);
-                s_start = 0;
-                s_end = 7;
-                s_inc = 1;
-            }
-
-            for (i = 0; i < row_info->width; i++)
-            {
-               v = (png_byte)((*sp >> sshift) & 0x01);
-               for (j = 0; j < jstop; j++)
-               {
-                  unsigned int tmp = *dp & (0x7f7f >> (7 - dshift));
-                  tmp |= (unsigned int)(v << dshift);
-                  *dp = (png_byte)(tmp & 0xff);
-
-                  if (dshift == s_end)
-                  {
-                     dshift = s_start;
-                     dp--;
-                  }
-
-                  else
-                     dshift = (unsigned int)((int)dshift + s_inc);
-               }
-
-               if (sshift == s_end)
-               {
-                  sshift = s_start;
-                  sp--;
-               }
-
-               else
-                  sshift = (unsigned int)((int)sshift + s_inc);
-            }
-            break;
-         }
-
-         case 2:
-         {
-            png_bytep sp = row + (png_uint_32)((row_info->width - 1) >> 2);
-            png_bytep dp = row + (png_uint_32)((final_width - 1) >> 2);
-            unsigned int sshift, dshift;
-            unsigned int s_start, s_end;
-            int s_inc;
-            int jstop = (int)png_pass_inc[pass];
-            png_uint_32 i;
-
-#ifdef PNG_READ_PACKSWAP_SUPPORTED
-            if ((transformations & PNG_PACKSWAP) != 0)
-            {
-               sshift = (((row_info->width + 3) & 0x03) << 1);
-               dshift = (((final_width + 3) & 0x03) << 1);
-               s_start = 6;
-               s_end = 0;
-               s_inc = -2;
-            }
-
-            else
-#endif
-            {
-               sshift = ((3 - ((row_info->width + 3) & 0x03)) << 1);
-               dshift = ((3 - ((final_width + 3) & 0x03)) << 1);
-               s_start = 0;
-               s_end = 6;
-               s_inc = 2;
-            }
-
-            for (i = 0; i < row_info->width; i++)
-            {
-               png_byte v;
-               int j;
-
-               v = (png_byte)((*sp >> sshift) & 0x03);
-               for (j = 0; j < jstop; j++)
-               {
-                  unsigned int tmp = *dp & (0x3f3f >> (6 - dshift));
-                  tmp |= (unsigned int)(v << dshift);
-                  *dp = (png_byte)(tmp & 0xff);
-
-                  if (dshift == s_end)
-                  {
-                     dshift = s_start;
-                     dp--;
-                  }
-
-                  else
-                     dshift = (unsigned int)((int)dshift + s_inc);
-               }
-
-               if (sshift == s_end)
-               {
-                  sshift = s_start;
-                  sp--;
-               }
-
-               else
-                  sshift = (unsigned int)((int)sshift + s_inc);
-            }
-            break;
-         }
-
-         case 4:
-         {
-            png_bytep sp = row + (size_t)((row_info->width - 1) >> 1);
-            png_bytep dp = row + (size_t)((final_width - 1) >> 1);
-            unsigned int sshift, dshift;
-            unsigned int s_start, s_end;
-            int s_inc;
-            png_uint_32 i;
-            int jstop = (int)png_pass_inc[pass];
-
-#ifdef PNG_READ_PACKSWAP_SUPPORTED
-            if ((transformations & PNG_PACKSWAP) != 0)
-            {
-               sshift = (((row_info->width + 1) & 0x01) << 2);
-               dshift = (((final_width + 1) & 0x01) << 2);
-               s_start = 4;
-               s_end = 0;
-               s_inc = -4;
-            }
-
-            else
-#endif
-            {
-               sshift = ((1 - ((row_info->width + 1) & 0x01)) << 2);
-               dshift = ((1 - ((final_width + 1) & 0x01)) << 2);
-               s_start = 0;
-               s_end = 4;
-               s_inc = 4;
-            }
-
-            for (i = 0; i < row_info->width; i++)
-            {
-               png_byte v = (png_byte)((*sp >> sshift) & 0x0f);
-               int j;
-
-               for (j = 0; j < jstop; j++)
-               {
-                  unsigned int tmp = *dp & (0xf0f >> (4 - dshift));
-                  tmp |= (unsigned int)(v << dshift);
-                  *dp = (png_byte)(tmp & 0xff);
-
-                  if (dshift == s_end)
-                  {
-                     dshift = s_start;
-                     dp--;
-                  }
-
-                  else
-                     dshift = (unsigned int)((int)dshift + s_inc);
-               }
-
-               if (sshift == s_end)
-               {
-                  sshift = s_start;
-                  sp--;
-               }
-
-               else
-                  sshift = (unsigned int)((int)sshift + s_inc);
-            }
-            break;
-         }
-
-         default:
-         {
-            size_t pixel_bytes = (row_info->pixel_depth >> 3);
-
-            png_bytep sp = row + (size_t)(row_info->width - 1)
-                * pixel_bytes;
-
-            png_bytep dp = row + (size_t)(final_width - 1) * pixel_bytes;
-
-            int jstop = (int)png_pass_inc[pass];
-            png_uint_32 i;
-
-            for (i = 0; i < row_info->width; i++)
-            {
-               png_byte v[8]; /* SAFE; pixel_depth does not exceed 64 */
-               int j;
-
-               memcpy(v, sp, pixel_bytes);
-
-               for (j = 0; j < jstop; j++)
-               {
-                  memcpy(dp, v, pixel_bytes);
-                  dp -= pixel_bytes;
-               }
-
-               sp -= pixel_bytes;
-            }
-            break;
-         }
-      }
-
-      row_info->width = final_width;
-      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, final_width);
-   }
-#ifndef PNG_READ_PACKSWAP_SUPPORTED
-   PNG_UNUSED(transformations)  /* Silence compiler warning */
-#endif
-}
-#endif /* READ_INTERLACING */
-
-static void
-png_read_filter_row_sub(png_row_infop row_info, png_bytep row,
-    png_const_bytep prev_row)
-{
-   size_t i;
-   size_t istop = row_info->rowbytes;
-   unsigned int bpp = (row_info->pixel_depth + 7) >> 3;
-   png_bytep rp = row + bpp;
-
-   PNG_UNUSED(prev_row)
-
-   for (i = bpp; i < istop; i++)
-   {
-      *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
-      rp++;
-   }
-}
-
-static void
-png_read_filter_row_up(png_row_infop row_info, png_bytep row,
-    png_const_bytep prev_row)
-{
-   size_t i;
-   size_t istop = row_info->rowbytes;
-   png_bytep rp = row;
-   png_const_bytep pp = prev_row;
-
-   for (i = 0; i < istop; i++)
-   {
-      *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
-      rp++;
-   }
-}
-
-static void
-png_read_filter_row_avg(png_row_infop row_info, png_bytep row,
-    png_const_bytep prev_row)
-{
-   size_t i;
-   png_bytep rp = row;
-   png_const_bytep pp = prev_row;
-   unsigned int bpp = (row_info->pixel_depth + 7) >> 3;
-   size_t istop = row_info->rowbytes - bpp;
-
-   for (i = 0; i < bpp; i++)
-   {
-      *rp = (png_byte)(((int)(*rp) +
-         ((int)(*pp++) / 2 )) & 0xff);
-
-      rp++;
-   }
-
-   for (i = 0; i < istop; i++)
-   {
-      *rp = (png_byte)(((int)(*rp) +
-         (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
-
-      rp++;
-   }
-}
-
-static void
-png_read_filter_row_paeth_1byte_pixel(png_row_infop row_info, png_bytep row,
-    png_const_bytep prev_row)
-{
-   png_bytep rp_end = row + row_info->rowbytes;
-   int a, c;
-
-   /* First pixel/byte */
-   c = *prev_row++;
-   a = *row + c;
-   *row++ = (png_byte)a;
-
-   /* Remainder */
-   while (row < rp_end)
-   {
-      int b, pa, pb, pc, p;
-
-      a &= 0xff; /* From previous iteration or start */
-      b = *prev_row++;
-
-      p = b - c;
-      pc = a - c;
-
-#ifdef PNG_USE_ABS
-      pa = abs(p);
-      pb = abs(pc);
-      pc = abs(p + pc);
-#else
-      pa = p < 0 ? -p : p;
-      pb = pc < 0 ? -pc : pc;
-      pc = (p + pc) < 0 ? -(p + pc) : p + pc;
-#endif
-
-      /* Find the best predictor, the least of pa, pb, pc favoring the earlier
-       * ones in the case of a tie.
-       */
-      if (pb < pa)
-      {
-         pa = pb; a = b;
-      }
-      if (pc < pa) a = c;
-
-      /* Calculate the current pixel in a, and move the previous row pixel to c
-       * for the next time round the loop
-       */
-      c = b;
-      a += *row;
-      *row++ = (png_byte)a;
-   }
-}
-
-static void
-png_read_filter_row_paeth_multibyte_pixel(png_row_infop row_info, png_bytep row,
-    png_const_bytep prev_row)
-{
-   unsigned int bpp = (row_info->pixel_depth + 7) >> 3;
-   png_bytep rp_end = row + bpp;
-
-   /* Process the first pixel in the row completely (this is the same as 'up'
-    * because there is only one candidate predictor for the first row).
-    */
-   while (row < rp_end)
-   {
-      int a = *row + *prev_row++;
-      *row++ = (png_byte)a;
-   }
-
-   /* Remainder */
-   rp_end = rp_end + (row_info->rowbytes - bpp);
-
-   while (row < rp_end)
-   {
-      int a, b, c, pa, pb, pc, p;
-
-      c = *(prev_row - bpp);
-      a = *(row - bpp);
-      b = *prev_row++;
-
-      p = b - c;
-      pc = a - c;
-
-#ifdef PNG_USE_ABS
-      pa = abs(p);
-      pb = abs(pc);
-      pc = abs(p + pc);
-#else
-      pa = p < 0 ? -p : p;
-      pb = pc < 0 ? -pc : pc;
-      pc = (p + pc) < 0 ? -(p + pc) : p + pc;
-#endif
-
-      if (pb < pa)
-      {
-         pa = pb; a = b;
-      }
-      if (pc < pa) a = c;
-
-      a += *row;
-      *row++ = (png_byte)a;
-   }
-}
-
-static void
-png_init_filter_functions(png_structrp pp)
-   /* This function is called once for every PNG image (except for PNG images
-    * that only use PNG_FILTER_VALUE_NONE for all rows) to set the
-    * implementations required to reverse the filtering of PNG rows.  Reversing
-    * the filter is the first transformation performed on the row data.  It is
-    * performed in place, therefore an implementation can be selected based on
-    * the image pixel format.  If the implementation depends on image width then
-    * take care to ensure that it works correctly if the image is interlaced -
-    * interlacing causes the actual row width to vary.
-    */
-{
-   unsigned int bpp = (pp->pixel_depth + 7) >> 3;
-
-   pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub;
-   pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up;
-   pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg;
-   if (bpp == 1)
-      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
-         png_read_filter_row_paeth_1byte_pixel;
-   else
-      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
-         png_read_filter_row_paeth_multibyte_pixel;
-
-#ifdef PNG_FILTER_OPTIMIZATIONS
-   /* To use this define PNG_FILTER_OPTIMIZATIONS as the name of a function to
-    * call to install hardware optimizations for the above functions; simply
-    * replace whatever elements of the pp->read_filter[] array with a hardware
-    * specific (or, for that matter, generic) optimization.
-    *
-    * To see an example of this examine what configure.ac does when
-    * --enable-arm-neon is specified on the command line.
-    */
-   PNG_FILTER_OPTIMIZATIONS(pp, bpp);
-#endif
-}
-
-void /* PRIVATE */
-png_read_filter_row(png_structrp pp, png_row_infop row_info, png_bytep row,
-    png_const_bytep prev_row, int filter)
-{
-   /* OPTIMIZATION: DO NOT MODIFY THIS FUNCTION, instead #define
-    * PNG_FILTER_OPTIMIZATIONS to a function that overrides the generic
-    * implementations.  See png_init_filter_functions above.
-    */
-   if (filter > PNG_FILTER_VALUE_NONE && filter < PNG_FILTER_VALUE_LAST)
-   {
-      if (pp->read_filter[0] == NULL)
-         png_init_filter_functions(pp);
-
-      pp->read_filter[filter-1](row_info, row, prev_row);
-   }
-}
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-void /* PRIVATE */
-png_read_IDAT_data(png_structrp png_ptr, png_bytep output,
-    png_alloc_size_t avail_out)
-{
-   /* Loop reading IDATs and decompressing the result into output[avail_out] */
-   png_ptr->zstream.next_out = output;
-   png_ptr->zstream.avail_out = 0; /* safety: set below */
-
-   if (output == NULL)
-      avail_out = 0;
-
-   do
-   {
-      int ret;
-      png_byte tmpbuf[PNG_INFLATE_BUF_SIZE];
-
-      if (png_ptr->zstream.avail_in == 0)
-      {
-         uInt avail_in;
-         png_bytep buffer;
-
-         while (png_ptr->idat_size == 0)
-         {
-            png_crc_finish(png_ptr, 0);
-
-            png_ptr->idat_size = png_read_chunk_header(png_ptr);
-            /* This is an error even in the 'check' case because the code just
-             * consumed a non-IDAT header.
-             */
-            if (png_ptr->chunk_name != png_IDAT)
-               png_error(png_ptr, "Not enough image data");
-         }
-
-         avail_in = png_ptr->IDAT_read_size;
-
-         if (avail_in > png_ptr->idat_size)
-            avail_in = (uInt)png_ptr->idat_size;
-
-         /* A PNG with a gradually increasing IDAT size will defeat this attempt
-          * to minimize memory usage by causing lots of re-allocs, but
-          * realistically doing IDAT_read_size re-allocs is not likely to be a
-          * big problem.
-          */
-         buffer = png_read_buffer(png_ptr, avail_in, 0/*error*/);
-
-         png_crc_read(png_ptr, buffer, avail_in);
-         png_ptr->idat_size -= avail_in;
-
-         png_ptr->zstream.next_in = buffer;
-         png_ptr->zstream.avail_in = avail_in;
-      }
-
-      /* And set up the output side. */
-      if (output != NULL) /* standard read */
-      {
-         uInt out = ZLIB_IO_MAX;
-
-         if (out > avail_out)
-            out = (uInt)avail_out;
-
-         avail_out -= out;
-         png_ptr->zstream.avail_out = out;
-      }
-
-      else /* after last row, checking for end */
-      {
-         png_ptr->zstream.next_out = tmpbuf;
-         png_ptr->zstream.avail_out = (sizeof tmpbuf);
-      }
-
-      /* Use NO_FLUSH; this gives zlib the maximum opportunity to optimize the
-       * process.  If the LZ stream is truncated the sequential reader will
-       * terminally damage the stream, above, by reading the chunk header of the
-       * following chunk (it then exits with png_error).
-       *
-       * TODO: deal more elegantly with truncated IDAT lists.
-       */
-      ret = PNG_INFLATE(png_ptr, Z_NO_FLUSH);
-
-      /* Take the unconsumed output back. */
-      if (output != NULL)
-         avail_out += png_ptr->zstream.avail_out;
-
-      else /* avail_out counts the extra bytes */
-         avail_out += (sizeof tmpbuf) - png_ptr->zstream.avail_out;
-
-      png_ptr->zstream.avail_out = 0;
-
-      if (ret == Z_STREAM_END)
-      {
-         /* Do this for safety; we won't read any more into this row. */
-         png_ptr->zstream.next_out = NULL;
-
-         png_ptr->mode |= PNG_AFTER_IDAT;
-         png_ptr->flags |= PNG_FLAG_ZSTREAM_ENDED;
-
-         if (png_ptr->zstream.avail_in > 0 || png_ptr->idat_size > 0)
-            png_chunk_benign_error(png_ptr, "Extra compressed data");
-         break;
-      }
-
-      if (ret != Z_OK)
-      {
-         png_zstream_error(png_ptr, ret);
-
-         if (output != NULL)
-            png_chunk_error(png_ptr, png_ptr->zstream.msg);
-
-         else /* checking */
-         {
-            png_chunk_benign_error(png_ptr, png_ptr->zstream.msg);
-            return;
-         }
-      }
-   } while (avail_out > 0);
-
-   if (avail_out > 0)
-   {
-      /* The stream ended before the image; this is the same as too few IDATs so
-       * should be handled the same way.
-       */
-      if (output != NULL)
-         png_error(png_ptr, "Not enough image data");
-
-      else /* the deflate stream contained extra data */
-         png_chunk_benign_error(png_ptr, "Too much image data");
-   }
-}
-
-void /* PRIVATE */
-png_read_finish_IDAT(png_structrp png_ptr)
-{
-   /* We don't need any more data and the stream should have ended, however the
-    * LZ end code may actually not have been processed.  In this case we must
-    * read it otherwise stray unread IDAT data or, more likely, an IDAT chunk
-    * may still remain to be consumed.
-    */
-   if ((png_ptr->flags & PNG_FLAG_ZSTREAM_ENDED) == 0)
-   {
-      /* The NULL causes png_read_IDAT_data to swallow any remaining bytes in
-       * the compressed stream, but the stream may be damaged too, so even after
-       * this call we may need to terminate the zstream ownership.
-       */
-      png_read_IDAT_data(png_ptr, NULL, 0);
-      png_ptr->zstream.next_out = NULL; /* safety */
-
-      /* Now clear everything out for safety; the following may not have been
-       * done.
-       */
-      if ((png_ptr->flags & PNG_FLAG_ZSTREAM_ENDED) == 0)
-      {
-         png_ptr->mode |= PNG_AFTER_IDAT;
-         png_ptr->flags |= PNG_FLAG_ZSTREAM_ENDED;
-      }
-   }
-
-   /* If the zstream has not been released do it now *and* terminate the reading
-    * of the final IDAT chunk.
-    */
-   if (png_ptr->zowner == png_IDAT)
-   {
-      /* Always do this; the pointers otherwise point into the read buffer. */
-      png_ptr->zstream.next_in = NULL;
-      png_ptr->zstream.avail_in = 0;
-
-      /* Now we no longer own the zstream. */
-      png_ptr->zowner = 0;
-
-      /* The slightly weird semantics of the sequential IDAT reading is that we
-       * are always in or at the end of an IDAT chunk, so we always need to do a
-       * crc_finish here.  If idat_size is non-zero we also need to read the
-       * spurious bytes at the end of the chunk now.
-       */
-      (void)png_crc_finish(png_ptr, png_ptr->idat_size);
-   }
-}
-
-void /* PRIVATE */
-png_read_finish_row(png_structrp png_ptr)
-{
-   /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */
-
-   /* Start of interlace block */
-   static const png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
-
-   /* Offset to next interlace block */
-   static const png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
-
-   /* Start of interlace block in the y direction */
-   static const png_byte png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
-
-   /* Offset to next interlace block in the y direction */
-   static const png_byte png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
-
-   png_debug(1, "in png_read_finish_row");
-   png_ptr->row_number++;
-   if (png_ptr->row_number < png_ptr->num_rows)
-      return;
-
-   if (png_ptr->interlaced != 0)
-   {
-      png_ptr->row_number = 0;
-
-      /* TO DO: don't do this if prev_row isn't needed (requires
-       * read-ahead of the next row's filter byte.
-       */
-      memset(png_ptr->prev_row, 0, png_ptr->rowbytes + 1);
-
-      do
-      {
-         png_ptr->pass++;
-
-         if (png_ptr->pass >= 7)
-            break;
-
-         png_ptr->iwidth = (png_ptr->width +
-            png_pass_inc[png_ptr->pass] - 1 -
-            png_pass_start[png_ptr->pass]) /
-            png_pass_inc[png_ptr->pass];
-
-         if ((png_ptr->transformations & PNG_INTERLACE) == 0)
-         {
-            png_ptr->num_rows = (png_ptr->height +
-                png_pass_yinc[png_ptr->pass] - 1 -
-                png_pass_ystart[png_ptr->pass]) /
-                png_pass_yinc[png_ptr->pass];
-         }
-
-         else  /* if (png_ptr->transformations & PNG_INTERLACE) */
-            break; /* libpng deinterlacing sees every row */
-
-      } while (png_ptr->num_rows == 0 || png_ptr->iwidth == 0);
-
-      if (png_ptr->pass < 7)
-         return;
-   }
-
-   /* Here after at the end of the last row of the last pass. */
-   png_read_finish_IDAT(png_ptr);
-}
-#endif /* SEQUENTIAL_READ */
-
-void /* PRIVATE */
-png_read_start_row(png_structrp png_ptr)
-{
-   /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */
-
-   /* Start of interlace block */
-   static const png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
-
-   /* Offset to next interlace block */
-   static const png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
-
-   /* Start of interlace block in the y direction */
-   static const png_byte png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
-
-   /* Offset to next interlace block in the y direction */
-   static const png_byte png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
-
-   unsigned int max_pixel_depth;
-   size_t row_bytes;
-
-   png_debug(1, "in png_read_start_row");
-
-#ifdef PNG_READ_TRANSFORMS_SUPPORTED
-   png_init_read_transformations(png_ptr);
-#endif
-   if (png_ptr->interlaced != 0)
-   {
-      if ((png_ptr->transformations & PNG_INTERLACE) == 0)
-         png_ptr->num_rows = (png_ptr->height + png_pass_yinc[0] - 1 -
-             png_pass_ystart[0]) / png_pass_yinc[0];
-
-      else
-         png_ptr->num_rows = png_ptr->height;
-
-      png_ptr->iwidth = (png_ptr->width +
-          png_pass_inc[png_ptr->pass] - 1 -
-          png_pass_start[png_ptr->pass]) /
-          png_pass_inc[png_ptr->pass];
-   }
-
-   else
-   {
-      png_ptr->num_rows = png_ptr->height;
-      png_ptr->iwidth = png_ptr->width;
-   }
-
-   max_pixel_depth = (unsigned int)png_ptr->pixel_depth;
-
-   /* WARNING: * png_read_transform_info (pngrtran.c) performs a simpler set of
-    * calculations to calculate the final pixel depth, then
-    * png_do_read_transforms actually does the transforms.  This means that the
-    * code which effectively calculates this value is actually repeated in three
-    * separate places.  They must all match.  Innocent changes to the order of
-    * transformations can and will break libpng in a way that causes memory
-    * overwrites.
-    *
-    * TODO: fix this.
-    */
-#ifdef PNG_READ_PACK_SUPPORTED
-   if ((png_ptr->transformations & PNG_PACK) != 0 && png_ptr->bit_depth < 8)
-      max_pixel_depth = 8;
-#endif
-
-#ifdef PNG_READ_EXPAND_SUPPORTED
-   if ((png_ptr->transformations & PNG_EXPAND) != 0)
-   {
-      if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-      {
-         if (png_ptr->num_trans != 0)
-            max_pixel_depth = 32;
-
-         else
-            max_pixel_depth = 24;
-      }
-
-      else if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY)
-      {
-         if (max_pixel_depth < 8)
-            max_pixel_depth = 8;
-
-         if (png_ptr->num_trans != 0)
-            max_pixel_depth *= 2;
-      }
-
-      else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB)
-      {
-         if (png_ptr->num_trans != 0)
-         {
-            max_pixel_depth *= 4;
-            max_pixel_depth /= 3;
-         }
-      }
-   }
-#endif
-
-#ifdef PNG_READ_EXPAND_16_SUPPORTED
-   if ((png_ptr->transformations & PNG_EXPAND_16) != 0)
-   {
-#  ifdef PNG_READ_EXPAND_SUPPORTED
-      /* In fact it is an error if it isn't supported, but checking is
-       * the safe way.
-       */
-      if ((png_ptr->transformations & PNG_EXPAND) != 0)
-      {
-         if (png_ptr->bit_depth < 16)
-            max_pixel_depth *= 2;
-      }
-      else
-#  endif
-      png_ptr->transformations &= ~PNG_EXPAND_16;
-   }
-#endif
-
-#ifdef PNG_READ_FILLER_SUPPORTED
-   if ((png_ptr->transformations & (PNG_FILLER)) != 0)
-   {
-      if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY)
-      {
-         if (max_pixel_depth <= 8)
-            max_pixel_depth = 16;
-
-         else
-            max_pixel_depth = 32;
-      }
-
-      else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB ||
-         png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-      {
-         if (max_pixel_depth <= 32)
-            max_pixel_depth = 32;
-
-         else
-            max_pixel_depth = 64;
-      }
-   }
-#endif
-
-#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
-   if ((png_ptr->transformations & PNG_GRAY_TO_RGB) != 0)
-   {
-      if (
-#ifdef PNG_READ_EXPAND_SUPPORTED
-          (png_ptr->num_trans != 0 &&
-          (png_ptr->transformations & PNG_EXPAND) != 0) ||
-#endif
-#ifdef PNG_READ_FILLER_SUPPORTED
-          (png_ptr->transformations & (PNG_FILLER)) != 0 ||
-#endif
-          png_ptr->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
-      {
-         if (max_pixel_depth <= 16)
-            max_pixel_depth = 32;
-
-         else
-            max_pixel_depth = 64;
-      }
-
-      else
-      {
-         if (max_pixel_depth <= 8)
-         {
-            if (png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
-               max_pixel_depth = 32;
-
-            else
-               max_pixel_depth = 24;
-         }
-
-         else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
-            max_pixel_depth = 64;
-
-         else
-            max_pixel_depth = 48;
-      }
-   }
-#endif
-
-#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) && \
-defined(PNG_USER_TRANSFORM_PTR_SUPPORTED)
-   if ((png_ptr->transformations & PNG_USER_TRANSFORM) != 0)
-   {
-      unsigned int user_pixel_depth = png_ptr->user_transform_depth *
-         png_ptr->user_transform_channels;
-
-      if (user_pixel_depth > max_pixel_depth)
-         max_pixel_depth = user_pixel_depth;
-   }
-#endif
-
-   /* This value is stored in png_struct and double checked in the row read
-    * code.
-    */
-   png_ptr->maximum_pixel_depth = (png_byte)max_pixel_depth;
-   png_ptr->transformed_pixel_depth = 0; /* calculated on demand */
-
-   /* Align the width on the next larger 8 pixels.  Mainly used
-    * for interlacing
-    */
-   row_bytes = ((png_ptr->width + 7) & ~((png_uint_32)7));
-   /* Calculate the maximum bytes needed, adding a byte and a pixel
-    * for safety's sake
-    */
-   row_bytes = PNG_ROWBYTES(max_pixel_depth, row_bytes) +
-       1 + ((max_pixel_depth + 7) >> 3U);
-
-#ifdef PNG_MAX_MALLOC_64K
-   if (row_bytes > (png_uint_32)65536L)
-      png_error(png_ptr, "This image requires a row greater than 64KB");
-#endif
-
-   if (row_bytes + 48 > png_ptr->old_big_row_buf_size)
-   {
-      png_free(png_ptr, png_ptr->big_row_buf);
-      png_free(png_ptr, png_ptr->big_prev_row);
-
-      if (png_ptr->interlaced != 0)
-         png_ptr->big_row_buf = (png_bytep)png_calloc(png_ptr,
-             row_bytes + 48);
-
-      else
-         png_ptr->big_row_buf = (png_bytep)png_malloc(png_ptr, row_bytes + 48);
-
-      png_ptr->big_prev_row = (png_bytep)png_malloc(png_ptr, row_bytes + 48);
-
-#ifdef PNG_ALIGNED_MEMORY_SUPPORTED
-      /* Use 16-byte aligned memory for row_buf with at least 16 bytes
-       * of padding before and after row_buf; treat prev_row similarly.
-       * NOTE: the alignment is to the start of the pixels, one beyond the start
-       * of the buffer, because of the filter byte.  Prior to libpng 1.5.6 this
-       * was incorrect; the filter byte was aligned, which had the exact
-       * opposite effect of that intended.
-       */
-      {
-         png_bytep temp = png_ptr->big_row_buf + 32;
-         size_t extra = (size_t)temp & 0x0f;
-         png_ptr->row_buf = temp - extra - 1/*filter byte*/;
-
-         temp = png_ptr->big_prev_row + 32;
-         extra = (size_t)temp & 0x0f;
-         png_ptr->prev_row = temp - extra - 1/*filter byte*/;
-      }
-#else
-      /* Use 31 bytes of padding before and 17 bytes after row_buf. */
-      png_ptr->row_buf = png_ptr->big_row_buf + 31;
-      png_ptr->prev_row = png_ptr->big_prev_row + 31;
-#endif
-      png_ptr->old_big_row_buf_size = row_bytes + 48;
-   }
-
-#ifdef PNG_MAX_MALLOC_64K
-   if (png_ptr->rowbytes > 65535)
-      png_error(png_ptr, "This image requires a row greater than 64KB");
-
-#endif
-   if (png_ptr->rowbytes > (PNG_SIZE_MAX - 1))
-      png_error(png_ptr, "Row has too many bytes to allocate in memory");
-
-   memset(png_ptr->prev_row, 0, png_ptr->rowbytes + 1);
-
-   png_debug1(3, "width = %u,", png_ptr->width);
-   png_debug1(3, "height = %u,", png_ptr->height);
-   png_debug1(3, "iwidth = %u,", png_ptr->iwidth);
-   png_debug1(3, "num_rows = %u,", png_ptr->num_rows);
-   png_debug1(3, "rowbytes = %lu,", (unsigned long)png_ptr->rowbytes);
-   png_debug1(3, "irowbytes = %lu",
-       (unsigned long)PNG_ROWBYTES(png_ptr->pixel_depth, png_ptr->iwidth) + 1);
-
-   /* The sequential reader needs a buffer for IDAT, but the progressive reader
-    * does not, so free the read buffer now regardless; the sequential reader
-    * reallocates it on demand.
-    */
-   if (png_ptr->read_buffer != NULL)
-   {
-      png_bytep buffer = png_ptr->read_buffer;
-
-      png_ptr->read_buffer_size = 0;
-      png_ptr->read_buffer = NULL;
-      png_free(png_ptr, buffer);
-   }
-
-   /* Finally claim the zstream for the inflate of the IDAT data, use the bits
-    * value from the stream (note that this will result in a fatal error if the
-    * IDAT stream has a bogus deflate header window_bits value, but this should
-    * not be happening any longer!)
-    */
-   if (png_inflate_claim(png_ptr, png_IDAT) != Z_OK)
-      png_error(png_ptr, png_ptr->zstream.msg);
-
-   png_ptr->flags |= PNG_FLAG_ROW_INIT;
-}
-#endif /* READ */
diff --git a/dep/libpng/src/pngset.c b/dep/libpng/src/pngset.c
deleted file mode 100644
index eb1c8c7a3..000000000
--- a/dep/libpng/src/pngset.c
+++ /dev/null
@@ -1,1803 +0,0 @@
-
-/* pngset.c - storage of image information into info struct
- *
- * Copyright (c) 2018-2024 Cosmin Truta
- * Copyright (c) 1998-2018 Glenn Randers-Pehrson
- * Copyright (c) 1996-1997 Andreas Dilger
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- *
- * The functions here are used during reads to store data from the file
- * into the info struct, and during writes to store application data
- * into the info struct for writing into the file.  This abstracts the
- * info struct and allows us to change the structure in the future.
- */
-
-#include "pngpriv.h"
-
-#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
-
-#ifdef PNG_bKGD_SUPPORTED
-void PNGAPI
-png_set_bKGD(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_const_color_16p background)
-{
-   png_debug1(1, "in %s storage function", "bKGD");
-
-   if (png_ptr == NULL || info_ptr == NULL || background == NULL)
-      return;
-
-   info_ptr->background = *background;
-   info_ptr->valid |= PNG_INFO_bKGD;
-}
-#endif
-
-#ifdef PNG_cHRM_SUPPORTED
-void PNGFAPI
-png_set_cHRM_fixed(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_fixed_point white_x, png_fixed_point white_y, png_fixed_point red_x,
-    png_fixed_point red_y, png_fixed_point green_x, png_fixed_point green_y,
-    png_fixed_point blue_x, png_fixed_point blue_y)
-{
-   png_xy xy;
-
-   png_debug1(1, "in %s storage function", "cHRM fixed");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   xy.redx = red_x;
-   xy.redy = red_y;
-   xy.greenx = green_x;
-   xy.greeny = green_y;
-   xy.bluex = blue_x;
-   xy.bluey = blue_y;
-   xy.whitex = white_x;
-   xy.whitey = white_y;
-
-   if (png_colorspace_set_chromaticities(png_ptr, &info_ptr->colorspace, &xy,
-       2/* override with app values*/) != 0)
-      info_ptr->colorspace.flags |= PNG_COLORSPACE_FROM_cHRM;
-
-   png_colorspace_sync_info(png_ptr, info_ptr);
-}
-
-void PNGFAPI
-png_set_cHRM_XYZ_fixed(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_fixed_point int_red_X, png_fixed_point int_red_Y,
-    png_fixed_point int_red_Z, png_fixed_point int_green_X,
-    png_fixed_point int_green_Y, png_fixed_point int_green_Z,
-    png_fixed_point int_blue_X, png_fixed_point int_blue_Y,
-    png_fixed_point int_blue_Z)
-{
-   png_XYZ XYZ;
-
-   png_debug1(1, "in %s storage function", "cHRM XYZ fixed");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   XYZ.red_X = int_red_X;
-   XYZ.red_Y = int_red_Y;
-   XYZ.red_Z = int_red_Z;
-   XYZ.green_X = int_green_X;
-   XYZ.green_Y = int_green_Y;
-   XYZ.green_Z = int_green_Z;
-   XYZ.blue_X = int_blue_X;
-   XYZ.blue_Y = int_blue_Y;
-   XYZ.blue_Z = int_blue_Z;
-
-   if (png_colorspace_set_endpoints(png_ptr, &info_ptr->colorspace,
-       &XYZ, 2) != 0)
-      info_ptr->colorspace.flags |= PNG_COLORSPACE_FROM_cHRM;
-
-   png_colorspace_sync_info(png_ptr, info_ptr);
-}
-
-#  ifdef PNG_FLOATING_POINT_SUPPORTED
-void PNGAPI
-png_set_cHRM(png_const_structrp png_ptr, png_inforp info_ptr,
-    double white_x, double white_y, double red_x, double red_y,
-    double green_x, double green_y, double blue_x, double blue_y)
-{
-   png_set_cHRM_fixed(png_ptr, info_ptr,
-       png_fixed(png_ptr, white_x, "cHRM White X"),
-       png_fixed(png_ptr, white_y, "cHRM White Y"),
-       png_fixed(png_ptr, red_x, "cHRM Red X"),
-       png_fixed(png_ptr, red_y, "cHRM Red Y"),
-       png_fixed(png_ptr, green_x, "cHRM Green X"),
-       png_fixed(png_ptr, green_y, "cHRM Green Y"),
-       png_fixed(png_ptr, blue_x, "cHRM Blue X"),
-       png_fixed(png_ptr, blue_y, "cHRM Blue Y"));
-}
-
-void PNGAPI
-png_set_cHRM_XYZ(png_const_structrp png_ptr, png_inforp info_ptr, double red_X,
-    double red_Y, double red_Z, double green_X, double green_Y, double green_Z,
-    double blue_X, double blue_Y, double blue_Z)
-{
-   png_set_cHRM_XYZ_fixed(png_ptr, info_ptr,
-       png_fixed(png_ptr, red_X, "cHRM Red X"),
-       png_fixed(png_ptr, red_Y, "cHRM Red Y"),
-       png_fixed(png_ptr, red_Z, "cHRM Red Z"),
-       png_fixed(png_ptr, green_X, "cHRM Green X"),
-       png_fixed(png_ptr, green_Y, "cHRM Green Y"),
-       png_fixed(png_ptr, green_Z, "cHRM Green Z"),
-       png_fixed(png_ptr, blue_X, "cHRM Blue X"),
-       png_fixed(png_ptr, blue_Y, "cHRM Blue Y"),
-       png_fixed(png_ptr, blue_Z, "cHRM Blue Z"));
-}
-#  endif /* FLOATING_POINT */
-
-#endif /* cHRM */
-
-#ifdef PNG_eXIf_SUPPORTED
-void PNGAPI
-png_set_eXIf(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_bytep exif)
-{
-  png_warning(png_ptr, "png_set_eXIf does not work; use png_set_eXIf_1");
-  PNG_UNUSED(info_ptr)
-  PNG_UNUSED(exif)
-}
-
-void PNGAPI
-png_set_eXIf_1(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_uint_32 num_exif, png_bytep exif)
-{
-   png_bytep new_exif;
-
-   png_debug1(1, "in %s storage function", "eXIf");
-
-   if (png_ptr == NULL || info_ptr == NULL ||
-       (png_ptr->mode & PNG_WROTE_eXIf) != 0)
-      return;
-
-   new_exif = png_voidcast(png_bytep, png_malloc_warn(png_ptr, num_exif));
-
-   if (new_exif == NULL)
-   {
-      png_warning(png_ptr, "Insufficient memory for eXIf chunk data");
-      return;
-   }
-
-   memcpy(new_exif, exif, (size_t)num_exif);
-
-   png_free_data(png_ptr, info_ptr, PNG_FREE_EXIF, 0);
-
-   info_ptr->num_exif = num_exif;
-   info_ptr->exif = new_exif;
-   info_ptr->free_me |= PNG_FREE_EXIF;
-   info_ptr->valid |= PNG_INFO_eXIf;
-}
-#endif /* eXIf */
-
-#ifdef PNG_gAMA_SUPPORTED
-void PNGFAPI
-png_set_gAMA_fixed(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_fixed_point file_gamma)
-{
-   png_debug1(1, "in %s storage function", "gAMA");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   png_colorspace_set_gamma(png_ptr, &info_ptr->colorspace, file_gamma);
-   png_colorspace_sync_info(png_ptr, info_ptr);
-}
-
-#  ifdef PNG_FLOATING_POINT_SUPPORTED
-void PNGAPI
-png_set_gAMA(png_const_structrp png_ptr, png_inforp info_ptr, double file_gamma)
-{
-   png_set_gAMA_fixed(png_ptr, info_ptr, png_fixed(png_ptr, file_gamma,
-       "png_set_gAMA"));
-}
-#  endif
-#endif
-
-#ifdef PNG_hIST_SUPPORTED
-void PNGAPI
-png_set_hIST(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_const_uint_16p hist)
-{
-   int i;
-
-   png_debug1(1, "in %s storage function", "hIST");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   if (info_ptr->num_palette == 0 || info_ptr->num_palette
-       > PNG_MAX_PALETTE_LENGTH)
-   {
-      png_warning(png_ptr,
-          "Invalid palette size, hIST allocation skipped");
-
-      return;
-   }
-
-   png_free_data(png_ptr, info_ptr, PNG_FREE_HIST, 0);
-
-   /* Changed from info->num_palette to PNG_MAX_PALETTE_LENGTH in
-    * version 1.2.1
-    */
-   info_ptr->hist = png_voidcast(png_uint_16p, png_malloc_warn(png_ptr,
-       PNG_MAX_PALETTE_LENGTH * (sizeof (png_uint_16))));
-
-   if (info_ptr->hist == NULL)
-   {
-      png_warning(png_ptr, "Insufficient memory for hIST chunk data");
-      return;
-   }
-
-   for (i = 0; i < info_ptr->num_palette; i++)
-      info_ptr->hist[i] = hist[i];
-
-   info_ptr->free_me |= PNG_FREE_HIST;
-   info_ptr->valid |= PNG_INFO_hIST;
-}
-#endif
-
-void PNGAPI
-png_set_IHDR(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_uint_32 width, png_uint_32 height, int bit_depth,
-    int color_type, int interlace_type, int compression_type,
-    int filter_type)
-{
-   png_debug1(1, "in %s storage function", "IHDR");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   info_ptr->width = width;
-   info_ptr->height = height;
-   info_ptr->bit_depth = (png_byte)bit_depth;
-   info_ptr->color_type = (png_byte)color_type;
-   info_ptr->compression_type = (png_byte)compression_type;
-   info_ptr->filter_type = (png_byte)filter_type;
-   info_ptr->interlace_type = (png_byte)interlace_type;
-
-   png_check_IHDR (png_ptr, info_ptr->width, info_ptr->height,
-       info_ptr->bit_depth, info_ptr->color_type, info_ptr->interlace_type,
-       info_ptr->compression_type, info_ptr->filter_type);
-
-   if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-      info_ptr->channels = 1;
-
-   else if ((info_ptr->color_type & PNG_COLOR_MASK_COLOR) != 0)
-      info_ptr->channels = 3;
-
-   else
-      info_ptr->channels = 1;
-
-   if ((info_ptr->color_type & PNG_COLOR_MASK_ALPHA) != 0)
-      info_ptr->channels++;
-
-   info_ptr->pixel_depth = (png_byte)(info_ptr->channels * info_ptr->bit_depth);
-
-   info_ptr->rowbytes = PNG_ROWBYTES(info_ptr->pixel_depth, width);
-}
-
-#ifdef PNG_oFFs_SUPPORTED
-void PNGAPI
-png_set_oFFs(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_int_32 offset_x, png_int_32 offset_y, int unit_type)
-{
-   png_debug1(1, "in %s storage function", "oFFs");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   info_ptr->x_offset = offset_x;
-   info_ptr->y_offset = offset_y;
-   info_ptr->offset_unit_type = (png_byte)unit_type;
-   info_ptr->valid |= PNG_INFO_oFFs;
-}
-#endif
-
-#ifdef PNG_pCAL_SUPPORTED
-void PNGAPI
-png_set_pCAL(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_const_charp purpose, png_int_32 X0, png_int_32 X1, int type,
-    int nparams, png_const_charp units, png_charpp params)
-{
-   size_t length;
-   int i;
-
-   png_debug1(1, "in %s storage function", "pCAL");
-
-   if (png_ptr == NULL || info_ptr == NULL || purpose == NULL || units == NULL
-       || (nparams > 0 && params == NULL))
-      return;
-
-   length = strlen(purpose) + 1;
-   png_debug1(3, "allocating purpose for info (%lu bytes)",
-       (unsigned long)length);
-
-   /* TODO: validate format of calibration name and unit name */
-
-   /* Check that the type matches the specification. */
-   if (type < 0 || type > 3)
-   {
-      png_chunk_report(png_ptr, "Invalid pCAL equation type",
-            PNG_CHUNK_WRITE_ERROR);
-      return;
-   }
-
-   if (nparams < 0 || nparams > 255)
-   {
-      png_chunk_report(png_ptr, "Invalid pCAL parameter count",
-            PNG_CHUNK_WRITE_ERROR);
-      return;
-   }
-
-   /* Validate params[nparams] */
-   for (i=0; i<nparams; ++i)
-   {
-      if (params[i] == NULL ||
-          !png_check_fp_string(params[i], strlen(params[i])))
-      {
-         png_chunk_report(png_ptr, "Invalid format for pCAL parameter",
-               PNG_CHUNK_WRITE_ERROR);
-         return;
-      }
-   }
-
-   info_ptr->pcal_purpose = png_voidcast(png_charp,
-       png_malloc_warn(png_ptr, length));
-
-   if (info_ptr->pcal_purpose == NULL)
-   {
-      png_chunk_report(png_ptr, "Insufficient memory for pCAL purpose",
-            PNG_CHUNK_WRITE_ERROR);
-      return;
-   }
-
-   memcpy(info_ptr->pcal_purpose, purpose, length);
-
-   info_ptr->free_me |= PNG_FREE_PCAL;
-
-   png_debug(3, "storing X0, X1, type, and nparams in info");
-   info_ptr->pcal_X0 = X0;
-   info_ptr->pcal_X1 = X1;
-   info_ptr->pcal_type = (png_byte)type;
-   info_ptr->pcal_nparams = (png_byte)nparams;
-
-   length = strlen(units) + 1;
-   png_debug1(3, "allocating units for info (%lu bytes)",
-       (unsigned long)length);
-
-   info_ptr->pcal_units = png_voidcast(png_charp,
-       png_malloc_warn(png_ptr, length));
-
-   if (info_ptr->pcal_units == NULL)
-   {
-      png_warning(png_ptr, "Insufficient memory for pCAL units");
-      return;
-   }
-
-   memcpy(info_ptr->pcal_units, units, length);
-
-   info_ptr->pcal_params = png_voidcast(png_charpp, png_malloc_warn(png_ptr,
-       (size_t)(((unsigned int)nparams + 1) * (sizeof (png_charp)))));
-
-   if (info_ptr->pcal_params == NULL)
-   {
-      png_warning(png_ptr, "Insufficient memory for pCAL params");
-      return;
-   }
-
-   memset(info_ptr->pcal_params, 0, ((unsigned int)nparams + 1) *
-       (sizeof (png_charp)));
-
-   for (i = 0; i < nparams; i++)
-   {
-      length = strlen(params[i]) + 1;
-      png_debug2(3, "allocating parameter %d for info (%lu bytes)", i,
-          (unsigned long)length);
-
-      info_ptr->pcal_params[i] = (png_charp)png_malloc_warn(png_ptr, length);
-
-      if (info_ptr->pcal_params[i] == NULL)
-      {
-         png_warning(png_ptr, "Insufficient memory for pCAL parameter");
-         return;
-      }
-
-      memcpy(info_ptr->pcal_params[i], params[i], length);
-   }
-
-   info_ptr->valid |= PNG_INFO_pCAL;
-}
-#endif
-
-#ifdef PNG_sCAL_SUPPORTED
-void PNGAPI
-png_set_sCAL_s(png_const_structrp png_ptr, png_inforp info_ptr,
-    int unit, png_const_charp swidth, png_const_charp sheight)
-{
-   size_t lengthw = 0, lengthh = 0;
-
-   png_debug1(1, "in %s storage function", "sCAL");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   /* Double check the unit (should never get here with an invalid
-    * unit unless this is an API call.)
-    */
-   if (unit != 1 && unit != 2)
-      png_error(png_ptr, "Invalid sCAL unit");
-
-   if (swidth == NULL || (lengthw = strlen(swidth)) == 0 ||
-       swidth[0] == 45 /* '-' */ || !png_check_fp_string(swidth, lengthw))
-      png_error(png_ptr, "Invalid sCAL width");
-
-   if (sheight == NULL || (lengthh = strlen(sheight)) == 0 ||
-       sheight[0] == 45 /* '-' */ || !png_check_fp_string(sheight, lengthh))
-      png_error(png_ptr, "Invalid sCAL height");
-
-   info_ptr->scal_unit = (png_byte)unit;
-
-   ++lengthw;
-
-   png_debug1(3, "allocating unit for info (%u bytes)", (unsigned int)lengthw);
-
-   info_ptr->scal_s_width = png_voidcast(png_charp,
-       png_malloc_warn(png_ptr, lengthw));
-
-   if (info_ptr->scal_s_width == NULL)
-   {
-      png_warning(png_ptr, "Memory allocation failed while processing sCAL");
-
-      return;
-   }
-
-   memcpy(info_ptr->scal_s_width, swidth, lengthw);
-
-   ++lengthh;
-
-   png_debug1(3, "allocating unit for info (%u bytes)", (unsigned int)lengthh);
-
-   info_ptr->scal_s_height = png_voidcast(png_charp,
-       png_malloc_warn(png_ptr, lengthh));
-
-   if (info_ptr->scal_s_height == NULL)
-   {
-      png_free(png_ptr, info_ptr->scal_s_width);
-      info_ptr->scal_s_width = NULL;
-
-      png_warning(png_ptr, "Memory allocation failed while processing sCAL");
-      return;
-   }
-
-   memcpy(info_ptr->scal_s_height, sheight, lengthh);
-
-   info_ptr->free_me |= PNG_FREE_SCAL;
-   info_ptr->valid |= PNG_INFO_sCAL;
-}
-
-#  ifdef PNG_FLOATING_POINT_SUPPORTED
-void PNGAPI
-png_set_sCAL(png_const_structrp png_ptr, png_inforp info_ptr, int unit,
-    double width, double height)
-{
-   png_debug1(1, "in %s storage function", "sCAL");
-
-   /* Check the arguments. */
-   if (width <= 0)
-      png_warning(png_ptr, "Invalid sCAL width ignored");
-
-   else if (height <= 0)
-      png_warning(png_ptr, "Invalid sCAL height ignored");
-
-   else
-   {
-      /* Convert 'width' and 'height' to ASCII. */
-      char swidth[PNG_sCAL_MAX_DIGITS+1];
-      char sheight[PNG_sCAL_MAX_DIGITS+1];
-
-      png_ascii_from_fp(png_ptr, swidth, (sizeof swidth), width,
-          PNG_sCAL_PRECISION);
-      png_ascii_from_fp(png_ptr, sheight, (sizeof sheight), height,
-          PNG_sCAL_PRECISION);
-
-      png_set_sCAL_s(png_ptr, info_ptr, unit, swidth, sheight);
-   }
-}
-#  endif
-
-#  ifdef PNG_FIXED_POINT_SUPPORTED
-void PNGAPI
-png_set_sCAL_fixed(png_const_structrp png_ptr, png_inforp info_ptr, int unit,
-    png_fixed_point width, png_fixed_point height)
-{
-   png_debug1(1, "in %s storage function", "sCAL");
-
-   /* Check the arguments. */
-   if (width <= 0)
-      png_warning(png_ptr, "Invalid sCAL width ignored");
-
-   else if (height <= 0)
-      png_warning(png_ptr, "Invalid sCAL height ignored");
-
-   else
-   {
-      /* Convert 'width' and 'height' to ASCII. */
-      char swidth[PNG_sCAL_MAX_DIGITS+1];
-      char sheight[PNG_sCAL_MAX_DIGITS+1];
-
-      png_ascii_from_fixed(png_ptr, swidth, (sizeof swidth), width);
-      png_ascii_from_fixed(png_ptr, sheight, (sizeof sheight), height);
-
-      png_set_sCAL_s(png_ptr, info_ptr, unit, swidth, sheight);
-   }
-}
-#  endif
-#endif
-
-#ifdef PNG_pHYs_SUPPORTED
-void PNGAPI
-png_set_pHYs(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_uint_32 res_x, png_uint_32 res_y, int unit_type)
-{
-   png_debug1(1, "in %s storage function", "pHYs");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   info_ptr->x_pixels_per_unit = res_x;
-   info_ptr->y_pixels_per_unit = res_y;
-   info_ptr->phys_unit_type = (png_byte)unit_type;
-   info_ptr->valid |= PNG_INFO_pHYs;
-}
-#endif
-
-void PNGAPI
-png_set_PLTE(png_structrp png_ptr, png_inforp info_ptr,
-    png_const_colorp palette, int num_palette)
-{
-
-   png_uint_32 max_palette_length;
-
-   png_debug1(1, "in %s storage function", "PLTE");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   max_palette_length = (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE) ?
-      (1 << info_ptr->bit_depth) : PNG_MAX_PALETTE_LENGTH;
-
-   if (num_palette < 0 || num_palette > (int) max_palette_length)
-   {
-      if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-         png_error(png_ptr, "Invalid palette length");
-
-      else
-      {
-         png_warning(png_ptr, "Invalid palette length");
-
-         return;
-      }
-   }
-
-   if ((num_palette > 0 && palette == NULL) ||
-      (num_palette == 0
-#        ifdef PNG_MNG_FEATURES_SUPPORTED
-            && (png_ptr->mng_features_permitted & PNG_FLAG_MNG_EMPTY_PLTE) == 0
-#        endif
-      ))
-   {
-      png_error(png_ptr, "Invalid palette");
-   }
-
-   /* It may not actually be necessary to set png_ptr->palette here;
-    * we do it for backward compatibility with the way the png_handle_tRNS
-    * function used to do the allocation.
-    *
-    * 1.6.0: the above statement appears to be incorrect; something has to set
-    * the palette inside png_struct on read.
-    */
-   png_free_data(png_ptr, info_ptr, PNG_FREE_PLTE, 0);
-
-   /* Changed in libpng-1.2.1 to allocate PNG_MAX_PALETTE_LENGTH instead
-    * of num_palette entries, in case of an invalid PNG file or incorrect
-    * call to png_set_PLTE() with too-large sample values.
-    */
-   png_ptr->palette = png_voidcast(png_colorp, png_calloc(png_ptr,
-       PNG_MAX_PALETTE_LENGTH * (sizeof (png_color))));
-
-   if (num_palette > 0)
-      memcpy(png_ptr->palette, palette, (unsigned int)num_palette *
-          (sizeof (png_color)));
-
-   info_ptr->palette = png_ptr->palette;
-   info_ptr->num_palette = png_ptr->num_palette = (png_uint_16)num_palette;
-   info_ptr->free_me |= PNG_FREE_PLTE;
-   info_ptr->valid |= PNG_INFO_PLTE;
-}
-
-#ifdef PNG_sBIT_SUPPORTED
-void PNGAPI
-png_set_sBIT(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_const_color_8p sig_bit)
-{
-   png_debug1(1, "in %s storage function", "sBIT");
-
-   if (png_ptr == NULL || info_ptr == NULL || sig_bit == NULL)
-      return;
-
-   info_ptr->sig_bit = *sig_bit;
-   info_ptr->valid |= PNG_INFO_sBIT;
-}
-#endif
-
-#ifdef PNG_sRGB_SUPPORTED
-void PNGAPI
-png_set_sRGB(png_const_structrp png_ptr, png_inforp info_ptr, int srgb_intent)
-{
-   png_debug1(1, "in %s storage function", "sRGB");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   (void)png_colorspace_set_sRGB(png_ptr, &info_ptr->colorspace, srgb_intent);
-   png_colorspace_sync_info(png_ptr, info_ptr);
-}
-
-void PNGAPI
-png_set_sRGB_gAMA_and_cHRM(png_const_structrp png_ptr, png_inforp info_ptr,
-    int srgb_intent)
-{
-   png_debug1(1, "in %s storage function", "sRGB_gAMA_and_cHRM");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   if (png_colorspace_set_sRGB(png_ptr, &info_ptr->colorspace,
-       srgb_intent) != 0)
-   {
-      /* This causes the gAMA and cHRM to be written too */
-      info_ptr->colorspace.flags |=
-         PNG_COLORSPACE_FROM_gAMA|PNG_COLORSPACE_FROM_cHRM;
-   }
-
-   png_colorspace_sync_info(png_ptr, info_ptr);
-}
-#endif /* sRGB */
-
-
-#ifdef PNG_iCCP_SUPPORTED
-void PNGAPI
-png_set_iCCP(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_const_charp name, int compression_type,
-    png_const_bytep profile, png_uint_32 proflen)
-{
-   png_charp new_iccp_name;
-   png_bytep new_iccp_profile;
-   size_t length;
-
-   png_debug1(1, "in %s storage function", "iCCP");
-
-   if (png_ptr == NULL || info_ptr == NULL || name == NULL || profile == NULL)
-      return;
-
-   if (compression_type != PNG_COMPRESSION_TYPE_BASE)
-      png_app_error(png_ptr, "Invalid iCCP compression method");
-
-   /* Set the colorspace first because this validates the profile; do not
-    * override previously set app cHRM or gAMA here (because likely as not the
-    * application knows better than libpng what the correct values are.)  Pass
-    * the info_ptr color_type field to png_colorspace_set_ICC because in the
-    * write case it has not yet been stored in png_ptr.
-    */
-   {
-      int result = png_colorspace_set_ICC(png_ptr, &info_ptr->colorspace, name,
-          proflen, profile, info_ptr->color_type);
-
-      png_colorspace_sync_info(png_ptr, info_ptr);
-
-      /* Don't do any of the copying if the profile was bad, or inconsistent. */
-      if (result == 0)
-         return;
-
-      /* But do write the gAMA and cHRM chunks from the profile. */
-      info_ptr->colorspace.flags |=
-         PNG_COLORSPACE_FROM_gAMA|PNG_COLORSPACE_FROM_cHRM;
-   }
-
-   length = strlen(name)+1;
-   new_iccp_name = png_voidcast(png_charp, png_malloc_warn(png_ptr, length));
-
-   if (new_iccp_name == NULL)
-   {
-      png_benign_error(png_ptr, "Insufficient memory to process iCCP chunk");
-
-      return;
-   }
-
-   memcpy(new_iccp_name, name, length);
-   new_iccp_profile = png_voidcast(png_bytep,
-       png_malloc_warn(png_ptr, proflen));
-
-   if (new_iccp_profile == NULL)
-   {
-      png_free(png_ptr, new_iccp_name);
-      png_benign_error(png_ptr,
-          "Insufficient memory to process iCCP profile");
-
-      return;
-   }
-
-   memcpy(new_iccp_profile, profile, proflen);
-
-   png_free_data(png_ptr, info_ptr, PNG_FREE_ICCP, 0);
-
-   info_ptr->iccp_proflen = proflen;
-   info_ptr->iccp_name = new_iccp_name;
-   info_ptr->iccp_profile = new_iccp_profile;
-   info_ptr->free_me |= PNG_FREE_ICCP;
-   info_ptr->valid |= PNG_INFO_iCCP;
-}
-#endif
-
-#ifdef PNG_TEXT_SUPPORTED
-void PNGAPI
-png_set_text(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_const_textp text_ptr, int num_text)
-{
-   int ret;
-   ret = png_set_text_2(png_ptr, info_ptr, text_ptr, num_text);
-
-   if (ret != 0)
-      png_error(png_ptr, "Insufficient memory to store text");
-}
-
-int /* PRIVATE */
-png_set_text_2(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_const_textp text_ptr, int num_text)
-{
-   int i;
-
-   png_debug1(1, "in text storage function, chunk typeid = 0x%lx",
-      png_ptr == NULL ? 0xabadca11UL : (unsigned long)png_ptr->chunk_name);
-
-   if (png_ptr == NULL || info_ptr == NULL || num_text <= 0 || text_ptr == NULL)
-      return 0;
-
-   /* Make sure we have enough space in the "text" array in info_struct
-    * to hold all of the incoming text_ptr objects.  This compare can't overflow
-    * because max_text >= num_text (anyway, subtract of two positive integers
-    * can't overflow in any case.)
-    */
-   if (num_text > info_ptr->max_text - info_ptr->num_text)
-   {
-      int old_num_text = info_ptr->num_text;
-      int max_text;
-      png_textp new_text = NULL;
-
-      /* Calculate an appropriate max_text, checking for overflow. */
-      max_text = old_num_text;
-      if (num_text <= INT_MAX - max_text)
-      {
-         max_text += num_text;
-
-         /* Round up to a multiple of 8 */
-         if (max_text < INT_MAX-8)
-            max_text = (max_text + 8) & ~0x7;
-
-         else
-            max_text = INT_MAX;
-
-         /* Now allocate a new array and copy the old members in; this does all
-          * the overflow checks.
-          */
-         new_text = png_voidcast(png_textp,png_realloc_array(png_ptr,
-             info_ptr->text, old_num_text, max_text-old_num_text,
-             sizeof *new_text));
-      }
-
-      if (new_text == NULL)
-      {
-         png_chunk_report(png_ptr, "too many text chunks",
-             PNG_CHUNK_WRITE_ERROR);
-
-         return 1;
-      }
-
-      png_free(png_ptr, info_ptr->text);
-
-      info_ptr->text = new_text;
-      info_ptr->free_me |= PNG_FREE_TEXT;
-      info_ptr->max_text = max_text;
-      /* num_text is adjusted below as the entries are copied in */
-
-      png_debug1(3, "allocated %d entries for info_ptr->text", max_text);
-   }
-
-   for (i = 0; i < num_text; i++)
-   {
-      size_t text_length, key_len;
-      size_t lang_len, lang_key_len;
-      png_textp textp = &(info_ptr->text[info_ptr->num_text]);
-
-      if (text_ptr[i].key == NULL)
-          continue;
-
-      if (text_ptr[i].compression < PNG_TEXT_COMPRESSION_NONE ||
-          text_ptr[i].compression >= PNG_TEXT_COMPRESSION_LAST)
-      {
-         png_chunk_report(png_ptr, "text compression mode is out of range",
-             PNG_CHUNK_WRITE_ERROR);
-         continue;
-      }
-
-      key_len = strlen(text_ptr[i].key);
-
-      if (text_ptr[i].compression <= 0)
-      {
-         lang_len = 0;
-         lang_key_len = 0;
-      }
-
-      else
-#  ifdef PNG_iTXt_SUPPORTED
-      {
-         /* Set iTXt data */
-
-         if (text_ptr[i].lang != NULL)
-            lang_len = strlen(text_ptr[i].lang);
-
-         else
-            lang_len = 0;
-
-         if (text_ptr[i].lang_key != NULL)
-            lang_key_len = strlen(text_ptr[i].lang_key);
-
-         else
-            lang_key_len = 0;
-      }
-#  else /* iTXt */
-      {
-         png_chunk_report(png_ptr, "iTXt chunk not supported",
-             PNG_CHUNK_WRITE_ERROR);
-         continue;
-      }
-#  endif
-
-      if (text_ptr[i].text == NULL || text_ptr[i].text[0] == '\0')
-      {
-         text_length = 0;
-#  ifdef PNG_iTXt_SUPPORTED
-         if (text_ptr[i].compression > 0)
-            textp->compression = PNG_ITXT_COMPRESSION_NONE;
-
-         else
-#  endif
-            textp->compression = PNG_TEXT_COMPRESSION_NONE;
-      }
-
-      else
-      {
-         text_length = strlen(text_ptr[i].text);
-         textp->compression = text_ptr[i].compression;
-      }
-
-      textp->key = png_voidcast(png_charp,png_malloc_base(png_ptr,
-          key_len + text_length + lang_len + lang_key_len + 4));
-
-      if (textp->key == NULL)
-      {
-         png_chunk_report(png_ptr, "text chunk: out of memory",
-             PNG_CHUNK_WRITE_ERROR);
-
-         return 1;
-      }
-
-      png_debug2(2, "Allocated %lu bytes at %p in png_set_text",
-          (unsigned long)(png_uint_32)
-          (key_len + lang_len + lang_key_len + text_length + 4),
-          textp->key);
-
-      memcpy(textp->key, text_ptr[i].key, key_len);
-      *(textp->key + key_len) = '\0';
-
-      if (text_ptr[i].compression > 0)
-      {
-         textp->lang = textp->key + key_len + 1;
-         memcpy(textp->lang, text_ptr[i].lang, lang_len);
-         *(textp->lang + lang_len) = '\0';
-         textp->lang_key = textp->lang + lang_len + 1;
-         memcpy(textp->lang_key, text_ptr[i].lang_key, lang_key_len);
-         *(textp->lang_key + lang_key_len) = '\0';
-         textp->text = textp->lang_key + lang_key_len + 1;
-      }
-
-      else
-      {
-         textp->lang=NULL;
-         textp->lang_key=NULL;
-         textp->text = textp->key + key_len + 1;
-      }
-
-      if (text_length != 0)
-         memcpy(textp->text, text_ptr[i].text, text_length);
-
-      *(textp->text + text_length) = '\0';
-
-#  ifdef PNG_iTXt_SUPPORTED
-      if (textp->compression > 0)
-      {
-         textp->text_length = 0;
-         textp->itxt_length = text_length;
-      }
-
-      else
-#  endif
-      {
-         textp->text_length = text_length;
-         textp->itxt_length = 0;
-      }
-
-      info_ptr->num_text++;
-      png_debug1(3, "transferred text chunk %d", info_ptr->num_text);
-   }
-
-   return 0;
-}
-#endif
-
-#ifdef PNG_tIME_SUPPORTED
-void PNGAPI
-png_set_tIME(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_const_timep mod_time)
-{
-   png_debug1(1, "in %s storage function", "tIME");
-
-   if (png_ptr == NULL || info_ptr == NULL || mod_time == NULL ||
-       (png_ptr->mode & PNG_WROTE_tIME) != 0)
-      return;
-
-   if (mod_time->month == 0   || mod_time->month > 12  ||
-       mod_time->day   == 0   || mod_time->day   > 31  ||
-       mod_time->hour  > 23   || mod_time->minute > 59 ||
-       mod_time->second > 60)
-   {
-      png_warning(png_ptr, "Ignoring invalid time value");
-
-      return;
-   }
-
-   info_ptr->mod_time = *mod_time;
-   info_ptr->valid |= PNG_INFO_tIME;
-}
-#endif
-
-#ifdef PNG_tRNS_SUPPORTED
-void PNGAPI
-png_set_tRNS(png_structrp png_ptr, png_inforp info_ptr,
-    png_const_bytep trans_alpha, int num_trans, png_const_color_16p trans_color)
-{
-   png_debug1(1, "in %s storage function", "tRNS");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-
-      return;
-
-   if (trans_alpha != NULL)
-   {
-       /* It may not actually be necessary to set png_ptr->trans_alpha here;
-        * we do it for backward compatibility with the way the png_handle_tRNS
-        * function used to do the allocation.
-        *
-        * 1.6.0: The above statement is incorrect; png_handle_tRNS effectively
-        * relies on png_set_tRNS storing the information in png_struct
-        * (otherwise it won't be there for the code in pngrtran.c).
-        */
-
-       png_free_data(png_ptr, info_ptr, PNG_FREE_TRNS, 0);
-
-       if (num_trans > 0 && num_trans <= PNG_MAX_PALETTE_LENGTH)
-       {
-         /* Changed from num_trans to PNG_MAX_PALETTE_LENGTH in version 1.2.1 */
-          info_ptr->trans_alpha = png_voidcast(png_bytep,
-              png_malloc(png_ptr, PNG_MAX_PALETTE_LENGTH));
-          memcpy(info_ptr->trans_alpha, trans_alpha, (size_t)num_trans);
-
-          info_ptr->free_me |= PNG_FREE_TRNS;
-          info_ptr->valid |= PNG_INFO_tRNS;
-       }
-       png_ptr->trans_alpha = info_ptr->trans_alpha;
-   }
-
-   if (trans_color != NULL)
-   {
-#ifdef PNG_WARNINGS_SUPPORTED
-      if (info_ptr->bit_depth < 16)
-      {
-         int sample_max = (1 << info_ptr->bit_depth) - 1;
-
-         if ((info_ptr->color_type == PNG_COLOR_TYPE_GRAY &&
-             trans_color->gray > sample_max) ||
-             (info_ptr->color_type == PNG_COLOR_TYPE_RGB &&
-             (trans_color->red > sample_max ||
-             trans_color->green > sample_max ||
-             trans_color->blue > sample_max)))
-            png_warning(png_ptr,
-                "tRNS chunk has out-of-range samples for bit_depth");
-      }
-#endif
-
-      info_ptr->trans_color = *trans_color;
-
-      if (num_trans == 0)
-         num_trans = 1;
-   }
-
-   info_ptr->num_trans = (png_uint_16)num_trans;
-
-   if (num_trans != 0)
-   {
-      info_ptr->free_me |= PNG_FREE_TRNS;
-      info_ptr->valid |= PNG_INFO_tRNS;
-   }
-}
-#endif
-
-#ifdef PNG_sPLT_SUPPORTED
-void PNGAPI
-png_set_sPLT(png_const_structrp png_ptr,
-    png_inforp info_ptr, png_const_sPLT_tp entries, int nentries)
-/*
- *  entries        - array of png_sPLT_t structures
- *                   to be added to the list of palettes
- *                   in the info structure.
- *
- *  nentries       - number of palette structures to be
- *                   added.
- */
-{
-   png_sPLT_tp np;
-
-   png_debug1(1, "in %s storage function", "sPLT");
-
-   if (png_ptr == NULL || info_ptr == NULL || nentries <= 0 || entries == NULL)
-      return;
-
-   /* Use the internal realloc function, which checks for all the possible
-    * overflows.  Notice that the parameters are (int) and (size_t)
-    */
-   np = png_voidcast(png_sPLT_tp,png_realloc_array(png_ptr,
-       info_ptr->splt_palettes, info_ptr->splt_palettes_num, nentries,
-       sizeof *np));
-
-   if (np == NULL)
-   {
-      /* Out of memory or too many chunks */
-      png_chunk_report(png_ptr, "too many sPLT chunks", PNG_CHUNK_WRITE_ERROR);
-      return;
-   }
-
-   png_free(png_ptr, info_ptr->splt_palettes);
-
-   info_ptr->splt_palettes = np;
-   info_ptr->free_me |= PNG_FREE_SPLT;
-
-   np += info_ptr->splt_palettes_num;
-
-   do
-   {
-      size_t length;
-
-      /* Skip invalid input entries */
-      if (entries->name == NULL || entries->entries == NULL)
-      {
-         /* png_handle_sPLT doesn't do this, so this is an app error */
-         png_app_error(png_ptr, "png_set_sPLT: invalid sPLT");
-         /* Just skip the invalid entry */
-         continue;
-      }
-
-      np->depth = entries->depth;
-
-      /* In the event of out-of-memory just return - there's no point keeping
-       * on trying to add sPLT chunks.
-       */
-      length = strlen(entries->name) + 1;
-      np->name = png_voidcast(png_charp, png_malloc_base(png_ptr, length));
-
-      if (np->name == NULL)
-         break;
-
-      memcpy(np->name, entries->name, length);
-
-      /* IMPORTANT: we have memory now that won't get freed if something else
-       * goes wrong; this code must free it.  png_malloc_array produces no
-       * warnings; use a png_chunk_report (below) if there is an error.
-       */
-      np->entries = png_voidcast(png_sPLT_entryp, png_malloc_array(png_ptr,
-          entries->nentries, sizeof (png_sPLT_entry)));
-
-      if (np->entries == NULL)
-      {
-         png_free(png_ptr, np->name);
-         np->name = NULL;
-         break;
-      }
-
-      np->nentries = entries->nentries;
-      /* This multiply can't overflow because png_malloc_array has already
-       * checked it when doing the allocation.
-       */
-      memcpy(np->entries, entries->entries,
-          (unsigned int)entries->nentries * sizeof (png_sPLT_entry));
-
-      /* Note that 'continue' skips the advance of the out pointer and out
-       * count, so an invalid entry is not added.
-       */
-      info_ptr->valid |= PNG_INFO_sPLT;
-      ++(info_ptr->splt_palettes_num);
-      ++np;
-      ++entries;
-   }
-   while (--nentries);
-
-   if (nentries > 0)
-      png_chunk_report(png_ptr, "sPLT out of memory", PNG_CHUNK_WRITE_ERROR);
-}
-#endif /* sPLT */
-
-#ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED
-static png_byte
-check_location(png_const_structrp png_ptr, int location)
-{
-   location &= (PNG_HAVE_IHDR|PNG_HAVE_PLTE|PNG_AFTER_IDAT);
-
-   /* New in 1.6.0; copy the location and check it.  This is an API
-    * change; previously the app had to use the
-    * png_set_unknown_chunk_location API below for each chunk.
-    */
-   if (location == 0 && (png_ptr->mode & PNG_IS_READ_STRUCT) == 0)
-   {
-      /* Write struct, so unknown chunks come from the app */
-      png_app_warning(png_ptr,
-          "png_set_unknown_chunks now expects a valid location");
-      /* Use the old behavior */
-      location = (png_byte)(png_ptr->mode &
-          (PNG_HAVE_IHDR|PNG_HAVE_PLTE|PNG_AFTER_IDAT));
-   }
-
-   /* This need not be an internal error - if the app calls
-    * png_set_unknown_chunks on a read pointer it must get the location right.
-    */
-   if (location == 0)
-      png_error(png_ptr, "invalid location in png_set_unknown_chunks");
-
-   /* Now reduce the location to the top-most set bit by removing each least
-    * significant bit in turn.
-    */
-   while (location != (location & -location))
-      location &= ~(location & -location);
-
-   /* The cast is safe because 'location' is a bit mask and only the low four
-    * bits are significant.
-    */
-   return (png_byte)location;
-}
-
-void PNGAPI
-png_set_unknown_chunks(png_const_structrp png_ptr,
-    png_inforp info_ptr, png_const_unknown_chunkp unknowns, int num_unknowns)
-{
-   png_unknown_chunkp np;
-
-   if (png_ptr == NULL || info_ptr == NULL || num_unknowns <= 0 ||
-       unknowns == NULL)
-      return;
-
-   /* Check for the failure cases where support has been disabled at compile
-    * time.  This code is hardly ever compiled - it's here because
-    * STORE_UNKNOWN_CHUNKS is set by both read and write code (compiling in this
-    * code) but may be meaningless if the read or write handling of unknown
-    * chunks is not compiled in.
-    */
-#  if !defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED) && \
-      defined(PNG_READ_SUPPORTED)
-      if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0)
-      {
-         png_app_error(png_ptr, "no unknown chunk support on read");
-
-         return;
-      }
-#  endif
-#  if !defined(PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED) && \
-      defined(PNG_WRITE_SUPPORTED)
-      if ((png_ptr->mode & PNG_IS_READ_STRUCT) == 0)
-      {
-         png_app_error(png_ptr, "no unknown chunk support on write");
-
-         return;
-      }
-#  endif
-
-   /* Prior to 1.6.0 this code used png_malloc_warn; however, this meant that
-    * unknown critical chunks could be lost with just a warning resulting in
-    * undefined behavior.  Now png_chunk_report is used to provide behavior
-    * appropriate to read or write.
-    */
-   np = png_voidcast(png_unknown_chunkp, png_realloc_array(png_ptr,
-       info_ptr->unknown_chunks, info_ptr->unknown_chunks_num, num_unknowns,
-       sizeof *np));
-
-   if (np == NULL)
-   {
-      png_chunk_report(png_ptr, "too many unknown chunks",
-          PNG_CHUNK_WRITE_ERROR);
-      return;
-   }
-
-   png_free(png_ptr, info_ptr->unknown_chunks);
-
-   info_ptr->unknown_chunks = np; /* safe because it is initialized */
-   info_ptr->free_me |= PNG_FREE_UNKN;
-
-   np += info_ptr->unknown_chunks_num;
-
-   /* Increment unknown_chunks_num each time round the loop to protect the
-    * just-allocated chunk data.
-    */
-   for (; num_unknowns > 0; --num_unknowns, ++unknowns)
-   {
-      memcpy(np->name, unknowns->name, (sizeof np->name));
-      np->name[(sizeof np->name)-1] = '\0';
-      np->location = check_location(png_ptr, unknowns->location);
-
-      if (unknowns->size == 0)
-      {
-         np->data = NULL;
-         np->size = 0;
-      }
-
-      else
-      {
-         np->data = png_voidcast(png_bytep,
-             png_malloc_base(png_ptr, unknowns->size));
-
-         if (np->data == NULL)
-         {
-            png_chunk_report(png_ptr, "unknown chunk: out of memory",
-                PNG_CHUNK_WRITE_ERROR);
-            /* But just skip storing the unknown chunk */
-            continue;
-         }
-
-         memcpy(np->data, unknowns->data, unknowns->size);
-         np->size = unknowns->size;
-      }
-
-      /* These increments are skipped on out-of-memory for the data - the
-       * unknown chunk entry gets overwritten if the png_chunk_report returns.
-       * This is correct in the read case (the chunk is just dropped.)
-       */
-      ++np;
-      ++(info_ptr->unknown_chunks_num);
-   }
-}
-
-void PNGAPI
-png_set_unknown_chunk_location(png_const_structrp png_ptr, png_inforp info_ptr,
-    int chunk, int location)
-{
-   /* This API is pretty pointless in 1.6.0 because the location can be set
-    * before the call to png_set_unknown_chunks.
-    *
-    * TODO: add a png_app_warning in 1.7
-    */
-   if (png_ptr != NULL && info_ptr != NULL && chunk >= 0 &&
-      chunk < info_ptr->unknown_chunks_num)
-   {
-      if ((location & (PNG_HAVE_IHDR|PNG_HAVE_PLTE|PNG_AFTER_IDAT)) == 0)
-      {
-         png_app_error(png_ptr, "invalid unknown chunk location");
-         /* Fake out the pre 1.6.0 behavior: */
-         if (((unsigned int)location & PNG_HAVE_IDAT) != 0) /* undocumented! */
-            location = PNG_AFTER_IDAT;
-
-         else
-            location = PNG_HAVE_IHDR; /* also undocumented */
-      }
-
-      info_ptr->unknown_chunks[chunk].location =
-         check_location(png_ptr, location);
-   }
-}
-#endif /* STORE_UNKNOWN_CHUNKS */
-
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-png_uint_32 PNGAPI
-png_permit_mng_features(png_structrp png_ptr, png_uint_32 mng_features)
-{
-   png_debug(1, "in png_permit_mng_features");
-
-   if (png_ptr == NULL)
-      return 0;
-
-   png_ptr->mng_features_permitted = mng_features & PNG_ALL_MNG_FEATURES;
-
-   return png_ptr->mng_features_permitted;
-}
-#endif
-
-#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-static unsigned int
-add_one_chunk(png_bytep list, unsigned int count, png_const_bytep add, int keep)
-{
-   unsigned int i;
-
-   /* Utility function: update the 'keep' state of a chunk if it is already in
-    * the list, otherwise add it to the list.
-    */
-   for (i=0; i<count; ++i, list += 5)
-   {
-      if (memcmp(list, add, 4) == 0)
-      {
-         list[4] = (png_byte)keep;
-
-         return count;
-      }
-   }
-
-   if (keep != PNG_HANDLE_CHUNK_AS_DEFAULT)
-   {
-      ++count;
-      memcpy(list, add, 4);
-      list[4] = (png_byte)keep;
-   }
-
-   return count;
-}
-
-void PNGAPI
-png_set_keep_unknown_chunks(png_structrp png_ptr, int keep,
-    png_const_bytep chunk_list, int num_chunks_in)
-{
-   png_bytep new_list;
-   unsigned int num_chunks, old_num_chunks;
-
-   if (png_ptr == NULL)
-      return;
-
-   if (keep < 0 || keep >= PNG_HANDLE_CHUNK_LAST)
-   {
-      png_app_error(png_ptr, "png_set_keep_unknown_chunks: invalid keep");
-
-      return;
-   }
-
-   if (num_chunks_in <= 0)
-   {
-      png_ptr->unknown_default = keep;
-
-      /* '0' means just set the flags, so stop here */
-      if (num_chunks_in == 0)
-        return;
-   }
-
-   if (num_chunks_in < 0)
-   {
-      /* Ignore all unknown chunks and all chunks recognized by
-       * libpng except for IHDR, PLTE, tRNS, IDAT, and IEND
-       */
-      static const png_byte chunks_to_ignore[] = {
-         98,  75,  71,  68, '\0',  /* bKGD */
-         99,  72,  82,  77, '\0',  /* cHRM */
-        101,  88,  73, 102, '\0',  /* eXIf */
-        103,  65,  77,  65, '\0',  /* gAMA */
-        104,  73,  83,  84, '\0',  /* hIST */
-        105,  67,  67,  80, '\0',  /* iCCP */
-        105,  84,  88, 116, '\0',  /* iTXt */
-        111,  70,  70, 115, '\0',  /* oFFs */
-        112,  67,  65,  76, '\0',  /* pCAL */
-        112,  72,  89, 115, '\0',  /* pHYs */
-        115,  66,  73,  84, '\0',  /* sBIT */
-        115,  67,  65,  76, '\0',  /* sCAL */
-        115,  80,  76,  84, '\0',  /* sPLT */
-        115,  84,  69,  82, '\0',  /* sTER */
-        115,  82,  71,  66, '\0',  /* sRGB */
-        116,  69,  88, 116, '\0',  /* tEXt */
-        116,  73,  77,  69, '\0',  /* tIME */
-        122,  84,  88, 116, '\0'   /* zTXt */
-      };
-
-      chunk_list = chunks_to_ignore;
-      num_chunks = (unsigned int)/*SAFE*/(sizeof chunks_to_ignore)/5U;
-   }
-
-   else /* num_chunks_in > 0 */
-   {
-      if (chunk_list == NULL)
-      {
-         /* Prior to 1.6.0 this was silently ignored, now it is an app_error
-          * which can be switched off.
-          */
-         png_app_error(png_ptr, "png_set_keep_unknown_chunks: no chunk list");
-
-         return;
-      }
-
-      num_chunks = (unsigned int)num_chunks_in;
-   }
-
-   old_num_chunks = png_ptr->num_chunk_list;
-   if (png_ptr->chunk_list == NULL)
-      old_num_chunks = 0;
-
-   /* Since num_chunks is always restricted to UINT_MAX/5 this can't overflow.
-    */
-   if (num_chunks + old_num_chunks > UINT_MAX/5)
-   {
-      png_app_error(png_ptr, "png_set_keep_unknown_chunks: too many chunks");
-
-      return;
-   }
-
-   /* If these chunks are being reset to the default then no more memory is
-    * required because add_one_chunk above doesn't extend the list if the 'keep'
-    * parameter is the default.
-    */
-   if (keep != 0)
-   {
-      new_list = png_voidcast(png_bytep, png_malloc(png_ptr,
-          5 * (num_chunks + old_num_chunks)));
-
-      if (old_num_chunks > 0)
-         memcpy(new_list, png_ptr->chunk_list, 5*old_num_chunks);
-   }
-
-   else if (old_num_chunks > 0)
-      new_list = png_ptr->chunk_list;
-
-   else
-      new_list = NULL;
-
-   /* Add the new chunks together with each one's handling code.  If the chunk
-    * already exists the code is updated, otherwise the chunk is added to the
-    * end.  (In libpng 1.6.0 order no longer matters because this code enforces
-    * the earlier convention that the last setting is the one that is used.)
-    */
-   if (new_list != NULL)
-   {
-      png_const_bytep inlist;
-      png_bytep outlist;
-      unsigned int i;
-
-      for (i=0; i<num_chunks; ++i)
-      {
-         old_num_chunks = add_one_chunk(new_list, old_num_chunks,
-             chunk_list+5*i, keep);
-      }
-
-      /* Now remove any spurious 'default' entries. */
-      num_chunks = 0;
-      for (i=0, inlist=outlist=new_list; i<old_num_chunks; ++i, inlist += 5)
-      {
-         if (inlist[4])
-         {
-            if (outlist != inlist)
-               memcpy(outlist, inlist, 5);
-            outlist += 5;
-            ++num_chunks;
-         }
-      }
-
-      /* This means the application has removed all the specialized handling. */
-      if (num_chunks == 0)
-      {
-         if (png_ptr->chunk_list != new_list)
-            png_free(png_ptr, new_list);
-
-         new_list = NULL;
-      }
-   }
-
-   else
-      num_chunks = 0;
-
-   png_ptr->num_chunk_list = num_chunks;
-
-   if (png_ptr->chunk_list != new_list)
-   {
-      if (png_ptr->chunk_list != NULL)
-         png_free(png_ptr, png_ptr->chunk_list);
-
-      png_ptr->chunk_list = new_list;
-   }
-}
-#endif
-
-#ifdef PNG_READ_USER_CHUNKS_SUPPORTED
-void PNGAPI
-png_set_read_user_chunk_fn(png_structrp png_ptr, png_voidp user_chunk_ptr,
-    png_user_chunk_ptr read_user_chunk_fn)
-{
-   png_debug(1, "in png_set_read_user_chunk_fn");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->read_user_chunk_fn = read_user_chunk_fn;
-   png_ptr->user_chunk_ptr = user_chunk_ptr;
-}
-#endif
-
-#ifdef PNG_INFO_IMAGE_SUPPORTED
-void PNGAPI
-png_set_rows(png_const_structrp png_ptr, png_inforp info_ptr,
-    png_bytepp row_pointers)
-{
-   png_debug(1, "in png_set_rows");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   if (info_ptr->row_pointers != NULL &&
-       (info_ptr->row_pointers != row_pointers))
-      png_free_data(png_ptr, info_ptr, PNG_FREE_ROWS, 0);
-
-   info_ptr->row_pointers = row_pointers;
-
-   if (row_pointers != NULL)
-      info_ptr->valid |= PNG_INFO_IDAT;
-}
-#endif
-
-void PNGAPI
-png_set_compression_buffer_size(png_structrp png_ptr, size_t size)
-{
-   png_debug(1, "in png_set_compression_buffer_size");
-
-   if (png_ptr == NULL)
-      return;
-
-   if (size == 0 || size > PNG_UINT_31_MAX)
-      png_error(png_ptr, "invalid compression buffer size");
-
-#  ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-   if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0)
-   {
-      png_ptr->IDAT_read_size = (png_uint_32)size; /* checked above */
-      return;
-   }
-#  endif
-
-#  ifdef PNG_WRITE_SUPPORTED
-   if ((png_ptr->mode & PNG_IS_READ_STRUCT) == 0)
-   {
-      if (png_ptr->zowner != 0)
-      {
-         png_warning(png_ptr,
-             "Compression buffer size cannot be changed because it is in use");
-
-         return;
-      }
-
-#ifndef __COVERITY__
-      /* Some compilers complain that this is always false.  However, it
-       * can be true when integer overflow happens.
-       */
-      if (size > ZLIB_IO_MAX)
-      {
-         png_warning(png_ptr,
-             "Compression buffer size limited to system maximum");
-         size = ZLIB_IO_MAX; /* must fit */
-      }
-#endif
-
-      if (size < 6)
-      {
-         /* Deflate will potentially go into an infinite loop on a SYNC_FLUSH
-          * if this is permitted.
-          */
-         png_warning(png_ptr,
-             "Compression buffer size cannot be reduced below 6");
-
-         return;
-      }
-
-      if (png_ptr->zbuffer_size != size)
-      {
-         png_free_buffer_list(png_ptr, &png_ptr->zbuffer_list);
-         png_ptr->zbuffer_size = (uInt)size;
-      }
-   }
-#  endif
-}
-
-void PNGAPI
-png_set_invalid(png_const_structrp png_ptr, png_inforp info_ptr, int mask)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      info_ptr->valid &= (unsigned int)(~mask);
-}
-
-
-#ifdef PNG_SET_USER_LIMITS_SUPPORTED
-/* This function was added to libpng 1.2.6 */
-void PNGAPI
-png_set_user_limits(png_structrp png_ptr, png_uint_32 user_width_max,
-    png_uint_32 user_height_max)
-{
-   png_debug(1, "in png_set_user_limits");
-
-   /* Images with dimensions larger than these limits will be
-    * rejected by png_set_IHDR().  To accept any PNG datastream
-    * regardless of dimensions, set both limits to 0x7fffffff.
-    */
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->user_width_max = user_width_max;
-   png_ptr->user_height_max = user_height_max;
-}
-
-/* This function was added to libpng 1.4.0 */
-void PNGAPI
-png_set_chunk_cache_max(png_structrp png_ptr, png_uint_32 user_chunk_cache_max)
-{
-   png_debug(1, "in png_set_chunk_cache_max");
-
-   if (png_ptr != NULL)
-      png_ptr->user_chunk_cache_max = user_chunk_cache_max;
-}
-
-/* This function was added to libpng 1.4.1 */
-void PNGAPI
-png_set_chunk_malloc_max(png_structrp png_ptr,
-    png_alloc_size_t user_chunk_malloc_max)
-{
-   png_debug(1, "in png_set_chunk_malloc_max");
-
-   if (png_ptr != NULL)
-      png_ptr->user_chunk_malloc_max = user_chunk_malloc_max;
-}
-#endif /* ?SET_USER_LIMITS */
-
-
-#ifdef PNG_BENIGN_ERRORS_SUPPORTED
-void PNGAPI
-png_set_benign_errors(png_structrp png_ptr, int allowed)
-{
-   png_debug(1, "in png_set_benign_errors");
-
-   /* If allowed is 1, png_benign_error() is treated as a warning.
-    *
-    * If allowed is 0, png_benign_error() is treated as an error (which
-    * is the default behavior if png_set_benign_errors() is not called).
-    */
-
-   if (allowed != 0)
-      png_ptr->flags |= PNG_FLAG_BENIGN_ERRORS_WARN |
-         PNG_FLAG_APP_WARNINGS_WARN | PNG_FLAG_APP_ERRORS_WARN;
-
-   else
-      png_ptr->flags &= ~(PNG_FLAG_BENIGN_ERRORS_WARN |
-         PNG_FLAG_APP_WARNINGS_WARN | PNG_FLAG_APP_ERRORS_WARN);
-}
-#endif /* BENIGN_ERRORS */
-
-#ifdef PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED
-   /* Whether to report invalid palette index; added at libng-1.5.10.
-    * It is possible for an indexed (color-type==3) PNG file to contain
-    * pixels with invalid (out-of-range) indexes if the PLTE chunk has
-    * fewer entries than the image's bit-depth would allow. We recover
-    * from this gracefully by filling any incomplete palette with zeros
-    * (opaque black).  By default, when this occurs libpng will issue
-    * a benign error.  This API can be used to override that behavior.
-    */
-void PNGAPI
-png_set_check_for_invalid_index(png_structrp png_ptr, int allowed)
-{
-   png_debug(1, "in png_set_check_for_invalid_index");
-
-   if (allowed > 0)
-      png_ptr->num_palette_max = 0;
-
-   else
-      png_ptr->num_palette_max = -1;
-}
-#endif
-
-#if defined(PNG_TEXT_SUPPORTED) || defined(PNG_pCAL_SUPPORTED) || \
-    defined(PNG_iCCP_SUPPORTED) || defined(PNG_sPLT_SUPPORTED)
-/* Check that the tEXt or zTXt keyword is valid per PNG 1.0 specification,
- * and if invalid, correct the keyword rather than discarding the entire
- * chunk.  The PNG 1.0 specification requires keywords 1-79 characters in
- * length, forbids leading or trailing whitespace, multiple internal spaces,
- * and the non-break space (0x80) from ISO 8859-1.  Returns keyword length.
- *
- * The 'new_key' buffer must be 80 characters in size (for the keyword plus a
- * trailing '\0').  If this routine returns 0 then there was no keyword, or a
- * valid one could not be generated, and the caller must png_error.
- */
-png_uint_32 /* PRIVATE */
-png_check_keyword(png_structrp png_ptr, png_const_charp key, png_bytep new_key)
-{
-#ifdef PNG_WARNINGS_SUPPORTED
-   png_const_charp orig_key = key;
-#endif
-   png_uint_32 key_len = 0;
-   int bad_character = 0;
-   int space = 1;
-
-   png_debug(1, "in png_check_keyword");
-
-   if (key == NULL)
-   {
-      *new_key = 0;
-      return 0;
-   }
-
-   while (*key && key_len < 79)
-   {
-      png_byte ch = (png_byte)*key++;
-
-      if ((ch > 32 && ch <= 126) || (ch >= 161 /*&& ch <= 255*/))
-      {
-         *new_key++ = ch; ++key_len; space = 0;
-      }
-
-      else if (space == 0)
-      {
-         /* A space or an invalid character when one wasn't seen immediately
-          * before; output just a space.
-          */
-         *new_key++ = 32; ++key_len; space = 1;
-
-         /* If the character was not a space then it is invalid. */
-         if (ch != 32)
-            bad_character = ch;
-      }
-
-      else if (bad_character == 0)
-         bad_character = ch; /* just skip it, record the first error */
-   }
-
-   if (key_len > 0 && space != 0) /* trailing space */
-   {
-      --key_len; --new_key;
-      if (bad_character == 0)
-         bad_character = 32;
-   }
-
-   /* Terminate the keyword */
-   *new_key = 0;
-
-   if (key_len == 0)
-      return 0;
-
-#ifdef PNG_WARNINGS_SUPPORTED
-   /* Try to only output one warning per keyword: */
-   if (*key != 0) /* keyword too long */
-      png_warning(png_ptr, "keyword truncated");
-
-   else if (bad_character != 0)
-   {
-      PNG_WARNING_PARAMETERS(p)
-
-      png_warning_parameter(p, 1, orig_key);
-      png_warning_parameter_signed(p, 2, PNG_NUMBER_FORMAT_02x, bad_character);
-
-      png_formatted_warning(png_ptr, p, "keyword \"@1\": bad character '0x@2'");
-   }
-#else /* !WARNINGS */
-   PNG_UNUSED(png_ptr)
-#endif /* !WARNINGS */
-
-   return key_len;
-}
-#endif /* TEXT || pCAL || iCCP || sPLT */
-#endif /* READ || WRITE */
diff --git a/dep/libpng/src/pngstruct.h b/dep/libpng/src/pngstruct.h
deleted file mode 100644
index e591d94d5..000000000
--- a/dep/libpng/src/pngstruct.h
+++ /dev/null
@@ -1,479 +0,0 @@
-
-/* pngstruct.h - header file for PNG reference library
- *
- * Copyright (c) 2018-2022 Cosmin Truta
- * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
- * Copyright (c) 1996-1997 Andreas Dilger
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-/* The structure that holds the information to read and write PNG files.
- * The only people who need to care about what is inside of this are the
- * people who will be modifying the library for their own special needs.
- * It should NOT be accessed directly by an application.
- */
-
-#ifndef PNGSTRUCT_H
-#define PNGSTRUCT_H
-/* zlib.h defines the structure z_stream, an instance of which is included
- * in this structure and is required for decompressing the LZ compressed
- * data in PNG files.
- */
-#ifndef ZLIB_CONST
-   /* We must ensure that zlib uses 'const' in declarations. */
-#  define ZLIB_CONST
-#endif
-#include "zlib.h"
-#ifdef const
-   /* zlib.h sometimes #defines const to nothing, undo this. */
-#  undef const
-#endif
-
-/* zlib.h has mediocre z_const use before 1.2.6, this stuff is for compatibility
- * with older builds.
- */
-#if ZLIB_VERNUM < 0x1260
-#  define PNGZ_MSG_CAST(s) png_constcast(char*,s)
-#  define PNGZ_INPUT_CAST(b) png_constcast(png_bytep,b)
-#else
-#  define PNGZ_MSG_CAST(s) (s)
-#  define PNGZ_INPUT_CAST(b) (b)
-#endif
-
-/* zlib.h declares a magic type 'uInt' that limits the amount of data that zlib
- * can handle at once.  This type need be no larger than 16 bits (so maximum of
- * 65535), this define allows us to discover how big it is, but limited by the
- * maximum for size_t.  The value can be overridden in a library build
- * (pngusr.h, or set it in CPPFLAGS) and it works to set it to a considerably
- * lower value (e.g. 255 works).  A lower value may help memory usage (slightly)
- * and may even improve performance on some systems (and degrade it on others.)
- */
-#ifndef ZLIB_IO_MAX
-#  define ZLIB_IO_MAX ((uInt)-1)
-#endif
-
-#ifdef PNG_WRITE_SUPPORTED
-/* The type of a compression buffer list used by the write code. */
-typedef struct png_compression_buffer
-{
-   struct png_compression_buffer *next;
-   png_byte                       output[1]; /* actually zbuf_size */
-} png_compression_buffer, *png_compression_bufferp;
-
-#define PNG_COMPRESSION_BUFFER_SIZE(pp)\
-   (offsetof(png_compression_buffer, output) + (pp)->zbuffer_size)
-#endif
-
-/* Colorspace support; structures used in png_struct, png_info and in internal
- * functions to hold and communicate information about the color space.
- *
- * PNG_COLORSPACE_SUPPORTED is only required if the application will perform
- * colorspace corrections, otherwise all the colorspace information can be
- * skipped and the size of libpng can be reduced (significantly) by compiling
- * out the colorspace support.
- */
-#ifdef PNG_COLORSPACE_SUPPORTED
-/* The chromaticities of the red, green and blue colorants and the chromaticity
- * of the corresponding white point (i.e. of rgb(1.0,1.0,1.0)).
- */
-typedef struct png_xy
-{
-   png_fixed_point redx, redy;
-   png_fixed_point greenx, greeny;
-   png_fixed_point bluex, bluey;
-   png_fixed_point whitex, whitey;
-} png_xy;
-
-/* The same data as above but encoded as CIE XYZ values.  When this data comes
- * from chromaticities the sum of the Y values is assumed to be 1.0
- */
-typedef struct png_XYZ
-{
-   png_fixed_point red_X, red_Y, red_Z;
-   png_fixed_point green_X, green_Y, green_Z;
-   png_fixed_point blue_X, blue_Y, blue_Z;
-} png_XYZ;
-#endif /* COLORSPACE */
-
-#if defined(PNG_COLORSPACE_SUPPORTED) || defined(PNG_GAMMA_SUPPORTED)
-/* A colorspace is all the above plus, potentially, profile information;
- * however at present libpng does not use the profile internally so it is only
- * stored in the png_info struct (if iCCP is supported.)  The rendering intent
- * is retained here and is checked.
- *
- * The file gamma encoding information is also stored here and gamma correction
- * is done by libpng, whereas color correction must currently be done by the
- * application.
- */
-typedef struct png_colorspace
-{
-#ifdef PNG_GAMMA_SUPPORTED
-   png_fixed_point gamma;        /* File gamma */
-#endif
-
-#ifdef PNG_COLORSPACE_SUPPORTED
-   png_xy      end_points_xy;    /* End points as chromaticities */
-   png_XYZ     end_points_XYZ;   /* End points as CIE XYZ colorant values */
-   png_uint_16 rendering_intent; /* Rendering intent of a profile */
-#endif
-
-   /* Flags are always defined to simplify the code. */
-   png_uint_16 flags;            /* As defined below */
-} png_colorspace, * PNG_RESTRICT png_colorspacerp;
-
-typedef const png_colorspace * PNG_RESTRICT png_const_colorspacerp;
-
-/* General flags for the 'flags' field */
-#define PNG_COLORSPACE_HAVE_GAMMA           0x0001
-#define PNG_COLORSPACE_HAVE_ENDPOINTS       0x0002
-#define PNG_COLORSPACE_HAVE_INTENT          0x0004
-#define PNG_COLORSPACE_FROM_gAMA            0x0008
-#define PNG_COLORSPACE_FROM_cHRM            0x0010
-#define PNG_COLORSPACE_FROM_sRGB            0x0020
-#define PNG_COLORSPACE_ENDPOINTS_MATCH_sRGB 0x0040
-#define PNG_COLORSPACE_MATCHES_sRGB         0x0080 /* exact match on profile */
-#define PNG_COLORSPACE_INVALID              0x8000
-#define PNG_COLORSPACE_CANCEL(flags)        (0xffff ^ (flags))
-#endif /* COLORSPACE || GAMMA */
-
-struct png_struct_def
-{
-#ifdef PNG_SETJMP_SUPPORTED
-   jmp_buf jmp_buf_local;     /* New name in 1.6.0 for jmp_buf in png_struct */
-   png_longjmp_ptr longjmp_fn;/* setjmp non-local goto function. */
-   jmp_buf *jmp_buf_ptr;      /* passed to longjmp_fn */
-   size_t jmp_buf_size;       /* size of the above, if allocated */
-#endif
-   png_error_ptr error_fn;    /* function for printing errors and aborting */
-#ifdef PNG_WARNINGS_SUPPORTED
-   png_error_ptr warning_fn;  /* function for printing warnings */
-#endif
-   png_voidp error_ptr;       /* user supplied struct for error functions */
-   png_rw_ptr write_data_fn;  /* function for writing output data */
-   png_rw_ptr read_data_fn;   /* function for reading input data */
-   png_voidp io_ptr;          /* ptr to application struct for I/O functions */
-
-#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
-   png_user_transform_ptr read_user_transform_fn; /* user read transform */
-#endif
-
-#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED
-   png_user_transform_ptr write_user_transform_fn; /* user write transform */
-#endif
-
-/* These were added in libpng-1.0.2 */
-#ifdef PNG_USER_TRANSFORM_PTR_SUPPORTED
-#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
-    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
-   png_voidp user_transform_ptr; /* user supplied struct for user transform */
-   png_byte user_transform_depth;    /* bit depth of user transformed pixels */
-   png_byte user_transform_channels; /* channels in user transformed pixels */
-#endif
-#endif
-
-   png_uint_32 mode;          /* tells us where we are in the PNG file */
-   png_uint_32 flags;         /* flags indicating various things to libpng */
-   png_uint_32 transformations; /* which transformations to perform */
-
-   png_uint_32 zowner;        /* ID (chunk type) of zstream owner, 0 if none */
-   z_stream    zstream;       /* decompression structure */
-
-#ifdef PNG_WRITE_SUPPORTED
-   png_compression_bufferp zbuffer_list; /* Created on demand during write */
-   uInt                    zbuffer_size; /* size of the actual buffer */
-
-   int zlib_level;            /* holds zlib compression level */
-   int zlib_method;           /* holds zlib compression method */
-   int zlib_window_bits;      /* holds zlib compression window bits */
-   int zlib_mem_level;        /* holds zlib compression memory level */
-   int zlib_strategy;         /* holds zlib compression strategy */
-#endif
-/* Added at libpng 1.5.4 */
-#ifdef PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED
-   int zlib_text_level;            /* holds zlib compression level */
-   int zlib_text_method;           /* holds zlib compression method */
-   int zlib_text_window_bits;      /* holds zlib compression window bits */
-   int zlib_text_mem_level;        /* holds zlib compression memory level */
-   int zlib_text_strategy;         /* holds zlib compression strategy */
-#endif
-/* End of material added at libpng 1.5.4 */
-/* Added at libpng 1.6.0 */
-#ifdef PNG_WRITE_SUPPORTED
-   int zlib_set_level;        /* Actual values set into the zstream on write */
-   int zlib_set_method;
-   int zlib_set_window_bits;
-   int zlib_set_mem_level;
-   int zlib_set_strategy;
-#endif
-
-   png_uint_32 width;         /* width of image in pixels */
-   png_uint_32 height;        /* height of image in pixels */
-   png_uint_32 num_rows;      /* number of rows in current pass */
-   png_uint_32 usr_width;     /* width of row at start of write */
-   size_t rowbytes;           /* size of row in bytes */
-   png_uint_32 iwidth;        /* width of current interlaced row in pixels */
-   png_uint_32 row_number;    /* current row in interlace pass */
-   png_uint_32 chunk_name;    /* PNG_CHUNK() id of current chunk */
-   png_bytep prev_row;        /* buffer to save previous (unfiltered) row.
-                               * While reading this is a pointer into
-                               * big_prev_row; while writing it is separately
-                               * allocated if needed.
-                               */
-   png_bytep row_buf;         /* buffer to save current (unfiltered) row.
-                               * While reading, this is a pointer into
-                               * big_row_buf; while writing it is separately
-                               * allocated.
-                               */
-#ifdef PNG_WRITE_FILTER_SUPPORTED
-   png_bytep try_row;    /* buffer to save trial row when filtering */
-   png_bytep tst_row;    /* buffer to save best trial row when filtering */
-#endif
-   size_t info_rowbytes;      /* Added in 1.5.4: cache of updated row bytes */
-
-   png_uint_32 idat_size;     /* current IDAT size for read */
-   png_uint_32 crc;           /* current chunk CRC value */
-   png_colorp palette;        /* palette from the input file */
-   png_uint_16 num_palette;   /* number of color entries in palette */
-
-/* Added at libpng-1.5.10 */
-#ifdef PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED
-   int num_palette_max;       /* maximum palette index found in IDAT */
-#endif
-
-   png_uint_16 num_trans;     /* number of transparency values */
-   png_byte compression;      /* file compression type (always 0) */
-   png_byte filter;           /* file filter type (always 0) */
-   png_byte interlaced;       /* PNG_INTERLACE_NONE, PNG_INTERLACE_ADAM7 */
-   png_byte pass;             /* current interlace pass (0 - 6) */
-   png_byte do_filter;        /* row filter flags (see PNG_FILTER_ in png.h ) */
-   png_byte color_type;       /* color type of file */
-   png_byte bit_depth;        /* bit depth of file */
-   png_byte usr_bit_depth;    /* bit depth of users row: write only */
-   png_byte pixel_depth;      /* number of bits per pixel */
-   png_byte channels;         /* number of channels in file */
-#ifdef PNG_WRITE_SUPPORTED
-   png_byte usr_channels;     /* channels at start of write: write only */
-#endif
-   png_byte sig_bytes;        /* magic bytes read/written from start of file */
-   png_byte maximum_pixel_depth;
-                              /* pixel depth used for the row buffers */
-   png_byte transformed_pixel_depth;
-                              /* pixel depth after read/write transforms */
-#if ZLIB_VERNUM >= 0x1240
-   png_byte zstream_start;    /* at start of an input zlib stream */
-#endif /* Zlib >= 1.2.4 */
-#if defined(PNG_READ_FILLER_SUPPORTED) || defined(PNG_WRITE_FILLER_SUPPORTED)
-   png_uint_16 filler;           /* filler bytes for pixel expansion */
-#endif
-
-#if defined(PNG_bKGD_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) ||\
-   defined(PNG_READ_ALPHA_MODE_SUPPORTED)
-   png_byte background_gamma_type;
-   png_fixed_point background_gamma;
-   png_color_16 background;   /* background color in screen gamma space */
-#ifdef PNG_READ_GAMMA_SUPPORTED
-   png_color_16 background_1; /* background normalized to gamma 1.0 */
-#endif
-#endif /* bKGD */
-
-#ifdef PNG_WRITE_FLUSH_SUPPORTED
-   png_flush_ptr output_flush_fn; /* Function for flushing output */
-   png_uint_32 flush_dist;    /* how many rows apart to flush, 0 - no flush */
-   png_uint_32 flush_rows;    /* number of rows written since last flush */
-#endif
-
-#ifdef PNG_READ_GAMMA_SUPPORTED
-   int gamma_shift;      /* number of "insignificant" bits in 16-bit gamma */
-   png_fixed_point screen_gamma; /* screen gamma value (display_exponent) */
-
-   png_bytep gamma_table;     /* gamma table for 8-bit depth files */
-   png_uint_16pp gamma_16_table; /* gamma table for 16-bit depth files */
-#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \
-   defined(PNG_READ_ALPHA_MODE_SUPPORTED) || \
-   defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
-   png_bytep gamma_from_1;    /* converts from 1.0 to screen */
-   png_bytep gamma_to_1;      /* converts from file to 1.0 */
-   png_uint_16pp gamma_16_from_1; /* converts from 1.0 to screen */
-   png_uint_16pp gamma_16_to_1; /* converts from file to 1.0 */
-#endif /* READ_BACKGROUND || READ_ALPHA_MODE || RGB_TO_GRAY */
-#endif
-
-#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_sBIT_SUPPORTED)
-   png_color_8 sig_bit;       /* significant bits in each available channel */
-#endif
-
-#if defined(PNG_READ_SHIFT_SUPPORTED) || defined(PNG_WRITE_SHIFT_SUPPORTED)
-   png_color_8 shift;         /* shift for significant bit transformation */
-#endif
-
-#if defined(PNG_tRNS_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) \
- || defined(PNG_READ_EXPAND_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
-   png_bytep trans_alpha;           /* alpha values for paletted files */
-   png_color_16 trans_color;  /* transparent color for non-paletted files */
-#endif
-
-   png_read_status_ptr read_row_fn;   /* called after each row is decoded */
-   png_write_status_ptr write_row_fn; /* called after each row is encoded */
-#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
-   png_progressive_info_ptr info_fn; /* called after header data fully read */
-   png_progressive_row_ptr row_fn;   /* called after a prog. row is decoded */
-   png_progressive_end_ptr end_fn;   /* called after image is complete */
-   png_bytep save_buffer_ptr;        /* current location in save_buffer */
-   png_bytep save_buffer;            /* buffer for previously read data */
-   png_bytep current_buffer_ptr;     /* current location in current_buffer */
-   png_bytep current_buffer;         /* buffer for recently used data */
-   png_uint_32 push_length;          /* size of current input chunk */
-   png_uint_32 skip_length;          /* bytes to skip in input data */
-   size_t save_buffer_size;          /* amount of data now in save_buffer */
-   size_t save_buffer_max;           /* total size of save_buffer */
-   size_t buffer_size;               /* total amount of available input data */
-   size_t current_buffer_size;       /* amount of data now in current_buffer */
-   int process_mode;                 /* what push library is currently doing */
-   int cur_palette;                  /* current push library palette index */
-#endif /* PROGRESSIVE_READ */
-
-#ifdef PNG_READ_QUANTIZE_SUPPORTED
-   png_bytep palette_lookup; /* lookup table for quantizing */
-   png_bytep quantize_index; /* index translation for palette files */
-#endif
-
-/* Options */
-#ifdef PNG_SET_OPTION_SUPPORTED
-   png_uint_32 options;           /* On/off state (up to 16 options) */
-#endif
-
-#if PNG_LIBPNG_VER < 10700
-/* To do: remove this from libpng-1.7 */
-#ifdef PNG_TIME_RFC1123_SUPPORTED
-   char time_buffer[29]; /* String to hold RFC 1123 time text */
-#endif
-#endif
-
-/* New members added in libpng-1.0.6 */
-
-   png_uint_32 free_me;    /* flags items libpng is responsible for freeing */
-
-#ifdef PNG_USER_CHUNKS_SUPPORTED
-   png_voidp user_chunk_ptr;
-#ifdef PNG_READ_USER_CHUNKS_SUPPORTED
-   png_user_chunk_ptr read_user_chunk_fn; /* user read chunk handler */
-#endif
-#endif
-
-#ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED
-   int          unknown_default; /* As PNG_HANDLE_* */
-   unsigned int num_chunk_list;  /* Number of entries in the list */
-   png_bytep    chunk_list;      /* List of png_byte[5]; the textual chunk name
-                                  * followed by a PNG_HANDLE_* byte */
-#endif
-
-/* New members added in libpng-1.0.3 */
-#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
-   png_byte rgb_to_gray_status;
-   /* Added in libpng 1.5.5 to record setting of coefficients: */
-   png_byte rgb_to_gray_coefficients_set;
-   /* These were changed from png_byte in libpng-1.0.6 */
-   png_uint_16 rgb_to_gray_red_coeff;
-   png_uint_16 rgb_to_gray_green_coeff;
-   /* deleted in 1.5.5: rgb_to_gray_blue_coeff; */
-#endif
-
-/* New member added in libpng-1.6.36 */
-#if defined(PNG_READ_EXPAND_SUPPORTED) && \
-    defined(PNG_ARM_NEON_IMPLEMENTATION)
-   png_bytep riffled_palette; /* buffer for accelerated palette expansion */
-#endif
-
-/* New member added in libpng-1.0.4 (renamed in 1.0.9) */
-#if defined(PNG_MNG_FEATURES_SUPPORTED)
-/* Changed from png_byte to png_uint_32 at version 1.2.0 */
-   png_uint_32 mng_features_permitted;
-#endif
-
-/* New member added in libpng-1.0.9, ifdef'ed out in 1.0.12, enabled in 1.2.0 */
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-   png_byte filter_type;
-#endif
-
-/* New members added in libpng-1.2.0 */
-
-/* New members added in libpng-1.0.2 but first enabled by default in 1.2.0 */
-#ifdef PNG_USER_MEM_SUPPORTED
-   png_voidp mem_ptr;             /* user supplied struct for mem functions */
-   png_malloc_ptr malloc_fn;      /* function for allocating memory */
-   png_free_ptr free_fn;          /* function for freeing memory */
-#endif
-
-/* New member added in libpng-1.0.13 and 1.2.0 */
-   png_bytep big_row_buf;         /* buffer to save current (unfiltered) row */
-
-#ifdef PNG_READ_QUANTIZE_SUPPORTED
-/* The following three members were added at version 1.0.14 and 1.2.4 */
-   png_bytep quantize_sort;          /* working sort array */
-   png_bytep index_to_palette;       /* where the original index currently is
-                                        in the palette */
-   png_bytep palette_to_index;       /* which original index points to this
-                                         palette color */
-#endif
-
-/* New members added in libpng-1.0.16 and 1.2.6 */
-   png_byte compression_type;
-
-#ifdef PNG_USER_LIMITS_SUPPORTED
-   png_uint_32 user_width_max;
-   png_uint_32 user_height_max;
-
-   /* Added in libpng-1.4.0: Total number of sPLT, text, and unknown
-    * chunks that can be stored (0 means unlimited).
-    */
-   png_uint_32 user_chunk_cache_max;
-
-   /* Total memory that a zTXt, sPLT, iTXt, iCCP, or unknown chunk
-    * can occupy when decompressed.  0 means unlimited.
-    */
-   png_alloc_size_t user_chunk_malloc_max;
-#endif
-
-/* New member added in libpng-1.0.25 and 1.2.17 */
-#ifdef PNG_READ_UNKNOWN_CHUNKS_SUPPORTED
-   /* Temporary storage for unknown chunk that the library doesn't recognize,
-    * used while reading the chunk.
-    */
-   png_unknown_chunk unknown_chunk;
-#endif
-
-/* New member added in libpng-1.2.26 */
-   size_t old_big_row_buf_size;
-
-#ifdef PNG_READ_SUPPORTED
-/* New member added in libpng-1.2.30 */
-  png_bytep        read_buffer;      /* buffer for reading chunk data */
-  png_alloc_size_t read_buffer_size; /* current size of the buffer */
-#endif
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-  uInt             IDAT_read_size;   /* limit on read buffer size for IDAT */
-#endif
-
-#ifdef PNG_IO_STATE_SUPPORTED
-/* New member added in libpng-1.4.0 */
-   png_uint_32 io_state;
-#endif
-
-/* New member added in libpng-1.5.6 */
-   png_bytep big_prev_row;
-
-/* New member added in libpng-1.5.7 */
-   void (*read_filter[PNG_FILTER_VALUE_LAST-1])(png_row_infop row_info,
-      png_bytep row, png_const_bytep prev_row);
-
-#ifdef PNG_READ_SUPPORTED
-#if defined(PNG_COLORSPACE_SUPPORTED) || defined(PNG_GAMMA_SUPPORTED)
-   png_colorspace   colorspace;
-#endif
-#endif
-};
-#endif /* PNGSTRUCT_H */
diff --git a/dep/libpng/src/pngtrans.c b/dep/libpng/src/pngtrans.c
deleted file mode 100644
index 62cb21edf..000000000
--- a/dep/libpng/src/pngtrans.c
+++ /dev/null
@@ -1,868 +0,0 @@
-
-/* pngtrans.c - transforms the data in a row (used by both readers and writers)
- *
- * Copyright (c) 2018-2024 Cosmin Truta
- * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
- * Copyright (c) 1996-1997 Andreas Dilger
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-#include "pngpriv.h"
-
-#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
-
-#if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED)
-/* Turn on BGR-to-RGB mapping */
-void PNGAPI
-png_set_bgr(png_structrp png_ptr)
-{
-   png_debug(1, "in png_set_bgr");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->transformations |= PNG_BGR;
-}
-#endif
-
-#if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED)
-/* Turn on 16-bit byte swapping */
-void PNGAPI
-png_set_swap(png_structrp png_ptr)
-{
-   png_debug(1, "in png_set_swap");
-
-   if (png_ptr == NULL)
-      return;
-
-   if (png_ptr->bit_depth == 16)
-      png_ptr->transformations |= PNG_SWAP_BYTES;
-}
-#endif
-
-#if defined(PNG_READ_PACK_SUPPORTED) || defined(PNG_WRITE_PACK_SUPPORTED)
-/* Turn on pixel packing */
-void PNGAPI
-png_set_packing(png_structrp png_ptr)
-{
-   png_debug(1, "in png_set_packing");
-
-   if (png_ptr == NULL)
-      return;
-
-   if (png_ptr->bit_depth < 8)
-   {
-      png_ptr->transformations |= PNG_PACK;
-#     ifdef PNG_WRITE_SUPPORTED
-         png_ptr->usr_bit_depth = 8;
-#     endif
-   }
-}
-#endif
-
-#if defined(PNG_READ_PACKSWAP_SUPPORTED)||defined(PNG_WRITE_PACKSWAP_SUPPORTED)
-/* Turn on packed pixel swapping */
-void PNGAPI
-png_set_packswap(png_structrp png_ptr)
-{
-   png_debug(1, "in png_set_packswap");
-
-   if (png_ptr == NULL)
-      return;
-
-   if (png_ptr->bit_depth < 8)
-      png_ptr->transformations |= PNG_PACKSWAP;
-}
-#endif
-
-#if defined(PNG_READ_SHIFT_SUPPORTED) || defined(PNG_WRITE_SHIFT_SUPPORTED)
-void PNGAPI
-png_set_shift(png_structrp png_ptr, png_const_color_8p true_bits)
-{
-   png_debug(1, "in png_set_shift");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->transformations |= PNG_SHIFT;
-   png_ptr->shift = *true_bits;
-}
-#endif
-
-#if defined(PNG_READ_INTERLACING_SUPPORTED) || \
-    defined(PNG_WRITE_INTERLACING_SUPPORTED)
-int PNGAPI
-png_set_interlace_handling(png_structrp png_ptr)
-{
-   png_debug(1, "in png_set_interlace handling");
-
-   if (png_ptr != 0 && png_ptr->interlaced != 0)
-   {
-      png_ptr->transformations |= PNG_INTERLACE;
-      return 7;
-   }
-
-   return 1;
-}
-#endif
-
-#if defined(PNG_READ_FILLER_SUPPORTED) || defined(PNG_WRITE_FILLER_SUPPORTED)
-/* Add a filler byte on read, or remove a filler or alpha byte on write.
- * The filler type has changed in v0.95 to allow future 2-byte fillers
- * for 48-bit input data, as well as to avoid problems with some compilers
- * that don't like bytes as parameters.
- */
-void PNGAPI
-png_set_filler(png_structrp png_ptr, png_uint_32 filler, int filler_loc)
-{
-   png_debug(1, "in png_set_filler");
-
-   if (png_ptr == NULL)
-      return;
-
-   /* In libpng 1.6 it is possible to determine whether this is a read or write
-    * operation and therefore to do more checking here for a valid call.
-    */
-   if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0)
-   {
-#     ifdef PNG_READ_FILLER_SUPPORTED
-         /* On read png_set_filler is always valid, regardless of the base PNG
-          * format, because other transformations can give a format where the
-          * filler code can execute (basically an 8 or 16-bit component RGB or G
-          * format.)
-          *
-          * NOTE: usr_channels is not used by the read code!  (This has led to
-          * confusion in the past.)  The filler is only used in the read code.
-          */
-         png_ptr->filler = (png_uint_16)filler;
-#     else
-         png_app_error(png_ptr, "png_set_filler not supported on read");
-         PNG_UNUSED(filler) /* not used in the write case */
-         return;
-#     endif
-   }
-
-   else /* write */
-   {
-#     ifdef PNG_WRITE_FILLER_SUPPORTED
-         /* On write the usr_channels parameter must be set correctly at the
-          * start to record the number of channels in the app-supplied data.
-          */
-         switch (png_ptr->color_type)
-         {
-            case PNG_COLOR_TYPE_RGB:
-               png_ptr->usr_channels = 4;
-               break;
-
-            case PNG_COLOR_TYPE_GRAY:
-               if (png_ptr->bit_depth >= 8)
-               {
-                  png_ptr->usr_channels = 2;
-                  break;
-               }
-
-               else
-               {
-                  /* There simply isn't any code in libpng to strip out bits
-                   * from bytes when the components are less than a byte in
-                   * size!
-                   */
-                  png_app_error(png_ptr,
-                      "png_set_filler is invalid for"
-                      " low bit depth gray output");
-                  return;
-               }
-
-            default:
-               png_app_error(png_ptr,
-                   "png_set_filler: inappropriate color type");
-               return;
-         }
-#     else
-         png_app_error(png_ptr, "png_set_filler not supported on write");
-         return;
-#     endif
-   }
-
-   /* Here on success - libpng supports the operation, set the transformation
-    * and the flag to say where the filler channel is.
-    */
-   png_ptr->transformations |= PNG_FILLER;
-
-   if (filler_loc == PNG_FILLER_AFTER)
-      png_ptr->flags |= PNG_FLAG_FILLER_AFTER;
-
-   else
-      png_ptr->flags &= ~PNG_FLAG_FILLER_AFTER;
-}
-
-/* Added to libpng-1.2.7 */
-void PNGAPI
-png_set_add_alpha(png_structrp png_ptr, png_uint_32 filler, int filler_loc)
-{
-   png_debug(1, "in png_set_add_alpha");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_set_filler(png_ptr, filler, filler_loc);
-   /* The above may fail to do anything. */
-   if ((png_ptr->transformations & PNG_FILLER) != 0)
-      png_ptr->transformations |= PNG_ADD_ALPHA;
-}
-
-#endif
-
-#if defined(PNG_READ_SWAP_ALPHA_SUPPORTED) || \
-    defined(PNG_WRITE_SWAP_ALPHA_SUPPORTED)
-void PNGAPI
-png_set_swap_alpha(png_structrp png_ptr)
-{
-   png_debug(1, "in png_set_swap_alpha");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->transformations |= PNG_SWAP_ALPHA;
-}
-#endif
-
-#if defined(PNG_READ_INVERT_ALPHA_SUPPORTED) || \
-    defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED)
-void PNGAPI
-png_set_invert_alpha(png_structrp png_ptr)
-{
-   png_debug(1, "in png_set_invert_alpha");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->transformations |= PNG_INVERT_ALPHA;
-}
-#endif
-
-#if defined(PNG_READ_INVERT_SUPPORTED) || defined(PNG_WRITE_INVERT_SUPPORTED)
-void PNGAPI
-png_set_invert_mono(png_structrp png_ptr)
-{
-   png_debug(1, "in png_set_invert_mono");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->transformations |= PNG_INVERT_MONO;
-}
-
-/* Invert monochrome grayscale data */
-void /* PRIVATE */
-png_do_invert(png_row_infop row_info, png_bytep row)
-{
-   png_debug(1, "in png_do_invert");
-
-  /* This test removed from libpng version 1.0.13 and 1.2.0:
-   *   if (row_info->bit_depth == 1 &&
-   */
-   if (row_info->color_type == PNG_COLOR_TYPE_GRAY)
-   {
-      png_bytep rp = row;
-      size_t i;
-      size_t istop = row_info->rowbytes;
-
-      for (i = 0; i < istop; i++)
-      {
-         *rp = (png_byte)(~(*rp));
-         rp++;
-      }
-   }
-
-   else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA &&
-      row_info->bit_depth == 8)
-   {
-      png_bytep rp = row;
-      size_t i;
-      size_t istop = row_info->rowbytes;
-
-      for (i = 0; i < istop; i += 2)
-      {
-         *rp = (png_byte)(~(*rp));
-         rp += 2;
-      }
-   }
-
-#ifdef PNG_16BIT_SUPPORTED
-   else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA &&
-      row_info->bit_depth == 16)
-   {
-      png_bytep rp = row;
-      size_t i;
-      size_t istop = row_info->rowbytes;
-
-      for (i = 0; i < istop; i += 4)
-      {
-         *rp = (png_byte)(~(*rp));
-         *(rp + 1) = (png_byte)(~(*(rp + 1)));
-         rp += 4;
-      }
-   }
-#endif
-}
-#endif
-
-#ifdef PNG_16BIT_SUPPORTED
-#if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED)
-/* Swaps byte order on 16-bit depth images */
-void /* PRIVATE */
-png_do_swap(png_row_infop row_info, png_bytep row)
-{
-   png_debug(1, "in png_do_swap");
-
-   if (row_info->bit_depth == 16)
-   {
-      png_bytep rp = row;
-      png_uint_32 i;
-      png_uint_32 istop= row_info->width * row_info->channels;
-
-      for (i = 0; i < istop; i++, rp += 2)
-      {
-#ifdef PNG_BUILTIN_BSWAP16_SUPPORTED
-         /* Feature added to libpng-1.6.11 for testing purposes, not
-          * enabled by default.
-          */
-         *(png_uint_16*)rp = __builtin_bswap16(*(png_uint_16*)rp);
-#else
-         png_byte t = *rp;
-         *rp = *(rp + 1);
-         *(rp + 1) = t;
-#endif
-      }
-   }
-}
-#endif
-#endif
-
-#if defined(PNG_READ_PACKSWAP_SUPPORTED)||defined(PNG_WRITE_PACKSWAP_SUPPORTED)
-static const png_byte onebppswaptable[256] = {
-   0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0,
-   0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
-   0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8,
-   0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
-   0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4,
-   0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
-   0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC,
-   0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
-   0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2,
-   0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
-   0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA,
-   0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
-   0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6,
-   0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
-   0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE,
-   0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
-   0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1,
-   0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
-   0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9,
-   0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
-   0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
-   0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
-   0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED,
-   0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
-   0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3,
-   0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
-   0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB,
-   0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
-   0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7,
-   0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
-   0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF,
-   0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
-};
-
-static const png_byte twobppswaptable[256] = {
-   0x00, 0x40, 0x80, 0xC0, 0x10, 0x50, 0x90, 0xD0,
-   0x20, 0x60, 0xA0, 0xE0, 0x30, 0x70, 0xB0, 0xF0,
-   0x04, 0x44, 0x84, 0xC4, 0x14, 0x54, 0x94, 0xD4,
-   0x24, 0x64, 0xA4, 0xE4, 0x34, 0x74, 0xB4, 0xF4,
-   0x08, 0x48, 0x88, 0xC8, 0x18, 0x58, 0x98, 0xD8,
-   0x28, 0x68, 0xA8, 0xE8, 0x38, 0x78, 0xB8, 0xF8,
-   0x0C, 0x4C, 0x8C, 0xCC, 0x1C, 0x5C, 0x9C, 0xDC,
-   0x2C, 0x6C, 0xAC, 0xEC, 0x3C, 0x7C, 0xBC, 0xFC,
-   0x01, 0x41, 0x81, 0xC1, 0x11, 0x51, 0x91, 0xD1,
-   0x21, 0x61, 0xA1, 0xE1, 0x31, 0x71, 0xB1, 0xF1,
-   0x05, 0x45, 0x85, 0xC5, 0x15, 0x55, 0x95, 0xD5,
-   0x25, 0x65, 0xA5, 0xE5, 0x35, 0x75, 0xB5, 0xF5,
-   0x09, 0x49, 0x89, 0xC9, 0x19, 0x59, 0x99, 0xD9,
-   0x29, 0x69, 0xA9, 0xE9, 0x39, 0x79, 0xB9, 0xF9,
-   0x0D, 0x4D, 0x8D, 0xCD, 0x1D, 0x5D, 0x9D, 0xDD,
-   0x2D, 0x6D, 0xAD, 0xED, 0x3D, 0x7D, 0xBD, 0xFD,
-   0x02, 0x42, 0x82, 0xC2, 0x12, 0x52, 0x92, 0xD2,
-   0x22, 0x62, 0xA2, 0xE2, 0x32, 0x72, 0xB2, 0xF2,
-   0x06, 0x46, 0x86, 0xC6, 0x16, 0x56, 0x96, 0xD6,
-   0x26, 0x66, 0xA6, 0xE6, 0x36, 0x76, 0xB6, 0xF6,
-   0x0A, 0x4A, 0x8A, 0xCA, 0x1A, 0x5A, 0x9A, 0xDA,
-   0x2A, 0x6A, 0xAA, 0xEA, 0x3A, 0x7A, 0xBA, 0xFA,
-   0x0E, 0x4E, 0x8E, 0xCE, 0x1E, 0x5E, 0x9E, 0xDE,
-   0x2E, 0x6E, 0xAE, 0xEE, 0x3E, 0x7E, 0xBE, 0xFE,
-   0x03, 0x43, 0x83, 0xC3, 0x13, 0x53, 0x93, 0xD3,
-   0x23, 0x63, 0xA3, 0xE3, 0x33, 0x73, 0xB3, 0xF3,
-   0x07, 0x47, 0x87, 0xC7, 0x17, 0x57, 0x97, 0xD7,
-   0x27, 0x67, 0xA7, 0xE7, 0x37, 0x77, 0xB7, 0xF7,
-   0x0B, 0x4B, 0x8B, 0xCB, 0x1B, 0x5B, 0x9B, 0xDB,
-   0x2B, 0x6B, 0xAB, 0xEB, 0x3B, 0x7B, 0xBB, 0xFB,
-   0x0F, 0x4F, 0x8F, 0xCF, 0x1F, 0x5F, 0x9F, 0xDF,
-   0x2F, 0x6F, 0xAF, 0xEF, 0x3F, 0x7F, 0xBF, 0xFF
-};
-
-static const png_byte fourbppswaptable[256] = {
-   0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
-   0x80, 0x90, 0xA0, 0xB0, 0xC0, 0xD0, 0xE0, 0xF0,
-   0x01, 0x11, 0x21, 0x31, 0x41, 0x51, 0x61, 0x71,
-   0x81, 0x91, 0xA1, 0xB1, 0xC1, 0xD1, 0xE1, 0xF1,
-   0x02, 0x12, 0x22, 0x32, 0x42, 0x52, 0x62, 0x72,
-   0x82, 0x92, 0xA2, 0xB2, 0xC2, 0xD2, 0xE2, 0xF2,
-   0x03, 0x13, 0x23, 0x33, 0x43, 0x53, 0x63, 0x73,
-   0x83, 0x93, 0xA3, 0xB3, 0xC3, 0xD3, 0xE3, 0xF3,
-   0x04, 0x14, 0x24, 0x34, 0x44, 0x54, 0x64, 0x74,
-   0x84, 0x94, 0xA4, 0xB4, 0xC4, 0xD4, 0xE4, 0xF4,
-   0x05, 0x15, 0x25, 0x35, 0x45, 0x55, 0x65, 0x75,
-   0x85, 0x95, 0xA5, 0xB5, 0xC5, 0xD5, 0xE5, 0xF5,
-   0x06, 0x16, 0x26, 0x36, 0x46, 0x56, 0x66, 0x76,
-   0x86, 0x96, 0xA6, 0xB6, 0xC6, 0xD6, 0xE6, 0xF6,
-   0x07, 0x17, 0x27, 0x37, 0x47, 0x57, 0x67, 0x77,
-   0x87, 0x97, 0xA7, 0xB7, 0xC7, 0xD7, 0xE7, 0xF7,
-   0x08, 0x18, 0x28, 0x38, 0x48, 0x58, 0x68, 0x78,
-   0x88, 0x98, 0xA8, 0xB8, 0xC8, 0xD8, 0xE8, 0xF8,
-   0x09, 0x19, 0x29, 0x39, 0x49, 0x59, 0x69, 0x79,
-   0x89, 0x99, 0xA9, 0xB9, 0xC9, 0xD9, 0xE9, 0xF9,
-   0x0A, 0x1A, 0x2A, 0x3A, 0x4A, 0x5A, 0x6A, 0x7A,
-   0x8A, 0x9A, 0xAA, 0xBA, 0xCA, 0xDA, 0xEA, 0xFA,
-   0x0B, 0x1B, 0x2B, 0x3B, 0x4B, 0x5B, 0x6B, 0x7B,
-   0x8B, 0x9B, 0xAB, 0xBB, 0xCB, 0xDB, 0xEB, 0xFB,
-   0x0C, 0x1C, 0x2C, 0x3C, 0x4C, 0x5C, 0x6C, 0x7C,
-   0x8C, 0x9C, 0xAC, 0xBC, 0xCC, 0xDC, 0xEC, 0xFC,
-   0x0D, 0x1D, 0x2D, 0x3D, 0x4D, 0x5D, 0x6D, 0x7D,
-   0x8D, 0x9D, 0xAD, 0xBD, 0xCD, 0xDD, 0xED, 0xFD,
-   0x0E, 0x1E, 0x2E, 0x3E, 0x4E, 0x5E, 0x6E, 0x7E,
-   0x8E, 0x9E, 0xAE, 0xBE, 0xCE, 0xDE, 0xEE, 0xFE,
-   0x0F, 0x1F, 0x2F, 0x3F, 0x4F, 0x5F, 0x6F, 0x7F,
-   0x8F, 0x9F, 0xAF, 0xBF, 0xCF, 0xDF, 0xEF, 0xFF
-};
-
-/* Swaps pixel packing order within bytes */
-void /* PRIVATE */
-png_do_packswap(png_row_infop row_info, png_bytep row)
-{
-   png_debug(1, "in png_do_packswap");
-
-   if (row_info->bit_depth < 8)
-   {
-      png_bytep rp;
-      png_const_bytep end, table;
-
-      end = row + row_info->rowbytes;
-
-      if (row_info->bit_depth == 1)
-         table = onebppswaptable;
-
-      else if (row_info->bit_depth == 2)
-         table = twobppswaptable;
-
-      else if (row_info->bit_depth == 4)
-         table = fourbppswaptable;
-
-      else
-         return;
-
-      for (rp = row; rp < end; rp++)
-         *rp = table[*rp];
-   }
-}
-#endif /* PACKSWAP || WRITE_PACKSWAP */
-
-#if defined(PNG_WRITE_FILLER_SUPPORTED) || \
-    defined(PNG_READ_STRIP_ALPHA_SUPPORTED)
-/* Remove a channel - this used to be 'png_do_strip_filler' but it used a
- * somewhat weird combination of flags to determine what to do.  All the calls
- * to png_do_strip_filler are changed in 1.5.2 to call this instead with the
- * correct arguments.
- *
- * The routine isn't general - the channel must be the channel at the start or
- * end (not in the middle) of each pixel.
- */
-void /* PRIVATE */
-png_do_strip_channel(png_row_infop row_info, png_bytep row, int at_start)
-{
-   png_bytep sp = row; /* source pointer */
-   png_bytep dp = row; /* destination pointer */
-   png_bytep ep = row + row_info->rowbytes; /* One beyond end of row */
-
-   png_debug(1, "in png_do_strip_channel");
-
-   /* At the start sp will point to the first byte to copy and dp to where
-    * it is copied to.  ep always points just beyond the end of the row, so
-    * the loop simply copies (channels-1) channels until sp reaches ep.
-    *
-    * at_start:        0 -- convert AG, XG, ARGB, XRGB, AAGG, XXGG, etc.
-    *            nonzero -- convert GA, GX, RGBA, RGBX, GGAA, RRGGBBXX, etc.
-    */
-
-   /* GA, GX, XG cases */
-   if (row_info->channels == 2)
-   {
-      if (row_info->bit_depth == 8)
-      {
-         if (at_start != 0) /* Skip initial filler */
-            ++sp;
-         else          /* Skip initial channel and, for sp, the filler */
-         {
-            sp += 2; ++dp;
-         }
-
-         /* For a 1 pixel wide image there is nothing to do */
-         while (sp < ep)
-         {
-            *dp++ = *sp; sp += 2;
-         }
-
-         row_info->pixel_depth = 8;
-      }
-
-      else if (row_info->bit_depth == 16)
-      {
-         if (at_start != 0) /* Skip initial filler */
-            sp += 2;
-         else          /* Skip initial channel and, for sp, the filler */
-         {
-            sp += 4; dp += 2;
-         }
-
-         while (sp < ep)
-         {
-            *dp++ = *sp++; *dp++ = *sp; sp += 3;
-         }
-
-         row_info->pixel_depth = 16;
-      }
-
-      else
-         return; /* bad bit depth */
-
-      row_info->channels = 1;
-
-      /* Finally fix the color type if it records an alpha channel */
-      if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
-         row_info->color_type = PNG_COLOR_TYPE_GRAY;
-   }
-
-   /* RGBA, RGBX, XRGB cases */
-   else if (row_info->channels == 4)
-   {
-      if (row_info->bit_depth == 8)
-      {
-         if (at_start != 0) /* Skip initial filler */
-            ++sp;
-         else          /* Skip initial channels and, for sp, the filler */
-         {
-            sp += 4; dp += 3;
-         }
-
-         /* Note that the loop adds 3 to dp and 4 to sp each time. */
-         while (sp < ep)
-         {
-            *dp++ = *sp++; *dp++ = *sp++; *dp++ = *sp; sp += 2;
-         }
-
-         row_info->pixel_depth = 24;
-      }
-
-      else if (row_info->bit_depth == 16)
-      {
-         if (at_start != 0) /* Skip initial filler */
-            sp += 2;
-         else          /* Skip initial channels and, for sp, the filler */
-         {
-            sp += 8; dp += 6;
-         }
-
-         while (sp < ep)
-         {
-            /* Copy 6 bytes, skip 2 */
-            *dp++ = *sp++; *dp++ = *sp++;
-            *dp++ = *sp++; *dp++ = *sp++;
-            *dp++ = *sp++; *dp++ = *sp; sp += 3;
-         }
-
-         row_info->pixel_depth = 48;
-      }
-
-      else
-         return; /* bad bit depth */
-
-      row_info->channels = 3;
-
-      /* Finally fix the color type if it records an alpha channel */
-      if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
-         row_info->color_type = PNG_COLOR_TYPE_RGB;
-   }
-
-   else
-      return; /* The filler channel has gone already */
-
-   /* Fix the rowbytes value. */
-   row_info->rowbytes = (size_t)(dp-row);
-}
-#endif
-
-#if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED)
-/* Swaps red and blue bytes within a pixel */
-void /* PRIVATE */
-png_do_bgr(png_row_infop row_info, png_bytep row)
-{
-   png_debug(1, "in png_do_bgr");
-
-   if ((row_info->color_type & PNG_COLOR_MASK_COLOR) != 0)
-   {
-      png_uint_32 row_width = row_info->width;
-      if (row_info->bit_depth == 8)
-      {
-         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
-         {
-            png_bytep rp;
-            png_uint_32 i;
-
-            for (i = 0, rp = row; i < row_width; i++, rp += 3)
-            {
-               png_byte save = *rp;
-               *rp = *(rp + 2);
-               *(rp + 2) = save;
-            }
-         }
-
-         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
-         {
-            png_bytep rp;
-            png_uint_32 i;
-
-            for (i = 0, rp = row; i < row_width; i++, rp += 4)
-            {
-               png_byte save = *rp;
-               *rp = *(rp + 2);
-               *(rp + 2) = save;
-            }
-         }
-      }
-
-#ifdef PNG_16BIT_SUPPORTED
-      else if (row_info->bit_depth == 16)
-      {
-         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
-         {
-            png_bytep rp;
-            png_uint_32 i;
-
-            for (i = 0, rp = row; i < row_width; i++, rp += 6)
-            {
-               png_byte save = *rp;
-               *rp = *(rp + 4);
-               *(rp + 4) = save;
-               save = *(rp + 1);
-               *(rp + 1) = *(rp + 5);
-               *(rp + 5) = save;
-            }
-         }
-
-         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
-         {
-            png_bytep rp;
-            png_uint_32 i;
-
-            for (i = 0, rp = row; i < row_width; i++, rp += 8)
-            {
-               png_byte save = *rp;
-               *rp = *(rp + 4);
-               *(rp + 4) = save;
-               save = *(rp + 1);
-               *(rp + 1) = *(rp + 5);
-               *(rp + 5) = save;
-            }
-         }
-      }
-#endif
-   }
-}
-#endif /* READ_BGR || WRITE_BGR */
-
-#if defined(PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED) || \
-    defined(PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED)
-/* Added at libpng-1.5.10 */
-void /* PRIVATE */
-png_do_check_palette_indexes(png_structrp png_ptr, png_row_infop row_info)
-{
-   png_debug(1, "in png_do_check_palette_indexes");
-
-   if (png_ptr->num_palette < (1 << row_info->bit_depth) &&
-      png_ptr->num_palette > 0) /* num_palette can be 0 in MNG files */
-   {
-      /* Calculations moved outside switch in an attempt to stop different
-       * compiler warnings.  'padding' is in *bits* within the last byte, it is
-       * an 'int' because pixel_depth becomes an 'int' in the expression below,
-       * and this calculation is used because it avoids warnings that other
-       * forms produced on either GCC or MSVC.
-       */
-      int padding = PNG_PADBITS(row_info->pixel_depth, row_info->width);
-      png_bytep rp = png_ptr->row_buf + row_info->rowbytes;
-
-      switch (row_info->bit_depth)
-      {
-         case 1:
-         {
-            /* in this case, all bytes must be 0 so we don't need
-             * to unpack the pixels except for the rightmost one.
-             */
-            for (; rp > png_ptr->row_buf; rp--)
-            {
-              if ((*rp >> padding) != 0)
-                 png_ptr->num_palette_max = 1;
-              padding = 0;
-            }
-
-            break;
-         }
-
-         case 2:
-         {
-            for (; rp > png_ptr->row_buf; rp--)
-            {
-              int i = ((*rp >> padding) & 0x03);
-
-              if (i > png_ptr->num_palette_max)
-                 png_ptr->num_palette_max = i;
-
-              i = (((*rp >> padding) >> 2) & 0x03);
-
-              if (i > png_ptr->num_palette_max)
-                 png_ptr->num_palette_max = i;
-
-              i = (((*rp >> padding) >> 4) & 0x03);
-
-              if (i > png_ptr->num_palette_max)
-                 png_ptr->num_palette_max = i;
-
-              i = (((*rp >> padding) >> 6) & 0x03);
-
-              if (i > png_ptr->num_palette_max)
-                 png_ptr->num_palette_max = i;
-
-              padding = 0;
-            }
-
-            break;
-         }
-
-         case 4:
-         {
-            for (; rp > png_ptr->row_buf; rp--)
-            {
-              int i = ((*rp >> padding) & 0x0f);
-
-              if (i > png_ptr->num_palette_max)
-                 png_ptr->num_palette_max = i;
-
-              i = (((*rp >> padding) >> 4) & 0x0f);
-
-              if (i > png_ptr->num_palette_max)
-                 png_ptr->num_palette_max = i;
-
-              padding = 0;
-            }
-
-            break;
-         }
-
-         case 8:
-         {
-            for (; rp > png_ptr->row_buf; rp--)
-            {
-               if (*rp > png_ptr->num_palette_max)
-                  png_ptr->num_palette_max = (int) *rp;
-            }
-
-            break;
-         }
-
-         default:
-            break;
-      }
-   }
-}
-#endif /* CHECK_FOR_INVALID_INDEX */
-
-#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
-    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
-#ifdef PNG_USER_TRANSFORM_PTR_SUPPORTED
-void PNGAPI
-png_set_user_transform_info(png_structrp png_ptr, png_voidp
-   user_transform_ptr, int user_transform_depth, int user_transform_channels)
-{
-   png_debug(1, "in png_set_user_transform_info");
-
-   if (png_ptr == NULL)
-      return;
-
-#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
-   if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0 &&
-      (png_ptr->flags & PNG_FLAG_ROW_INIT) != 0)
-   {
-      png_app_error(png_ptr,
-          "info change after png_start_read_image or png_read_update_info");
-      return;
-   }
-#endif
-
-   png_ptr->user_transform_ptr = user_transform_ptr;
-   png_ptr->user_transform_depth = (png_byte)user_transform_depth;
-   png_ptr->user_transform_channels = (png_byte)user_transform_channels;
-}
-#endif
-
-/* This function returns a pointer to the user_transform_ptr associated with
- * the user transform functions.  The application should free any memory
- * associated with this pointer before png_write_destroy and png_read_destroy
- * are called.
- */
-#ifdef PNG_USER_TRANSFORM_PTR_SUPPORTED
-png_voidp PNGAPI
-png_get_user_transform_ptr(png_const_structrp png_ptr)
-{
-   if (png_ptr == NULL)
-      return NULL;
-
-   return png_ptr->user_transform_ptr;
-}
-#endif
-
-#ifdef PNG_USER_TRANSFORM_INFO_SUPPORTED
-png_uint_32 PNGAPI
-png_get_current_row_number(png_const_structrp png_ptr)
-{
-   /* See the comments in png.h - this is the sub-image row when reading an
-    * interlaced image.
-    */
-   if (png_ptr != NULL)
-      return png_ptr->row_number;
-
-   return PNG_UINT_32_MAX; /* help the app not to fail silently */
-}
-
-png_byte PNGAPI
-png_get_current_pass_number(png_const_structrp png_ptr)
-{
-   if (png_ptr != NULL)
-      return png_ptr->pass;
-   return 8; /* invalid */
-}
-#endif /* USER_TRANSFORM_INFO */
-#endif /* READ_USER_TRANSFORM || WRITE_USER_TRANSFORM */
-#endif /* READ || WRITE */
diff --git a/dep/libpng/src/pngwio.c b/dep/libpng/src/pngwio.c
deleted file mode 100644
index 10e919dd0..000000000
--- a/dep/libpng/src/pngwio.c
+++ /dev/null
@@ -1,168 +0,0 @@
-
-/* pngwio.c - functions for data output
- *
- * Copyright (c) 2018 Cosmin Truta
- * Copyright (c) 1998-2002,2004,2006-2014,2016,2018 Glenn Randers-Pehrson
- * Copyright (c) 1996-1997 Andreas Dilger
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- *
- * This file provides a location for all output.  Users who need
- * special handling are expected to write functions that have the same
- * arguments as these and perform similar functions, but that possibly
- * use different output methods.  Note that you shouldn't change these
- * functions, but rather write replacement functions and then change
- * them at run time with png_set_write_fn(...).
- */
-
-#include "pngpriv.h"
-
-#ifdef PNG_WRITE_SUPPORTED
-
-/* Write the data to whatever output you are using.  The default routine
- * writes to a file pointer.  Note that this routine sometimes gets called
- * with very small lengths, so you should implement some kind of simple
- * buffering if you are using unbuffered writes.  This should never be asked
- * to write more than 64K on a 16-bit machine.
- */
-
-void /* PRIVATE */
-png_write_data(png_structrp png_ptr, png_const_bytep data, size_t length)
-{
-   /* NOTE: write_data_fn must not change the buffer! */
-   if (png_ptr->write_data_fn != NULL )
-      (*(png_ptr->write_data_fn))(png_ptr, png_constcast(png_bytep,data),
-          length);
-
-   else
-      png_error(png_ptr, "Call to NULL write function");
-}
-
-#ifdef PNG_STDIO_SUPPORTED
-/* This is the function that does the actual writing of data.  If you are
- * not writing to a standard C stream, you should create a replacement
- * write_data function and use it at run time with png_set_write_fn(), rather
- * than changing the library.
- */
-void PNGCBAPI
-png_default_write_data(png_structp png_ptr, png_bytep data, size_t length)
-{
-   size_t check;
-
-   if (png_ptr == NULL)
-      return;
-
-   check = fwrite(data, 1, length, (png_FILE_p)(png_ptr->io_ptr));
-
-   if (check != length)
-      png_error(png_ptr, "Write Error");
-}
-#endif
-
-/* This function is called to output any data pending writing (normally
- * to disk).  After png_flush is called, there should be no data pending
- * writing in any buffers.
- */
-#ifdef PNG_WRITE_FLUSH_SUPPORTED
-void /* PRIVATE */
-png_flush(png_structrp png_ptr)
-{
-   if (png_ptr->output_flush_fn != NULL)
-      (*(png_ptr->output_flush_fn))(png_ptr);
-}
-
-#  ifdef PNG_STDIO_SUPPORTED
-void PNGCBAPI
-png_default_flush(png_structp png_ptr)
-{
-   png_FILE_p io_ptr;
-
-   if (png_ptr == NULL)
-      return;
-
-   io_ptr = png_voidcast(png_FILE_p, (png_ptr->io_ptr));
-   fflush(io_ptr);
-}
-#  endif
-#endif
-
-/* This function allows the application to supply new output functions for
- * libpng if standard C streams aren't being used.
- *
- * This function takes as its arguments:
- * png_ptr       - pointer to a png output data structure
- * io_ptr        - pointer to user supplied structure containing info about
- *                 the output functions.  May be NULL.
- * write_data_fn - pointer to a new output function that takes as its
- *                 arguments a pointer to a png_struct, a pointer to
- *                 data to be written, and a 32-bit unsigned int that is
- *                 the number of bytes to be written.  The new write
- *                 function should call png_error(png_ptr, "Error msg")
- *                 to exit and output any fatal error messages.  May be
- *                 NULL, in which case libpng's default function will
- *                 be used.
- * flush_data_fn - pointer to a new flush function that takes as its
- *                 arguments a pointer to a png_struct.  After a call to
- *                 the flush function, there should be no data in any buffers
- *                 or pending transmission.  If the output method doesn't do
- *                 any buffering of output, a function prototype must still be
- *                 supplied although it doesn't have to do anything.  If
- *                 PNG_WRITE_FLUSH_SUPPORTED is not defined at libpng compile
- *                 time, output_flush_fn will be ignored, although it must be
- *                 supplied for compatibility.  May be NULL, in which case
- *                 libpng's default function will be used, if
- *                 PNG_WRITE_FLUSH_SUPPORTED is defined.  This is not
- *                 a good idea if io_ptr does not point to a standard
- *                 *FILE structure.
- */
-void PNGAPI
-png_set_write_fn(png_structrp png_ptr, png_voidp io_ptr,
-    png_rw_ptr write_data_fn, png_flush_ptr output_flush_fn)
-{
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->io_ptr = io_ptr;
-
-#ifdef PNG_STDIO_SUPPORTED
-   if (write_data_fn != NULL)
-      png_ptr->write_data_fn = write_data_fn;
-
-   else
-      png_ptr->write_data_fn = png_default_write_data;
-#else
-   png_ptr->write_data_fn = write_data_fn;
-#endif
-
-#ifdef PNG_WRITE_FLUSH_SUPPORTED
-#  ifdef PNG_STDIO_SUPPORTED
-
-   if (output_flush_fn != NULL)
-      png_ptr->output_flush_fn = output_flush_fn;
-
-   else
-      png_ptr->output_flush_fn = png_default_flush;
-
-#  else
-   png_ptr->output_flush_fn = output_flush_fn;
-#  endif
-#else
-   PNG_UNUSED(output_flush_fn)
-#endif /* WRITE_FLUSH */
-
-#ifdef PNG_READ_SUPPORTED
-   /* It is an error to read while writing a png file */
-   if (png_ptr->read_data_fn != NULL)
-   {
-      png_ptr->read_data_fn = NULL;
-
-      png_warning(png_ptr,
-          "Can't set both read_data_fn and write_data_fn in the"
-          " same structure");
-   }
-#endif
-}
-#endif /* WRITE */
diff --git a/dep/libpng/src/pngwrite.c b/dep/libpng/src/pngwrite.c
deleted file mode 100644
index 77e412f43..000000000
--- a/dep/libpng/src/pngwrite.c
+++ /dev/null
@@ -1,2418 +0,0 @@
-
-/* pngwrite.c - general routines to write a PNG file
- *
- * Copyright (c) 2018-2024 Cosmin Truta
- * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
- * Copyright (c) 1996-1997 Andreas Dilger
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-#include "pngpriv.h"
-#ifdef PNG_SIMPLIFIED_WRITE_STDIO_SUPPORTED
-#  include <errno.h>
-#endif /* SIMPLIFIED_WRITE_STDIO */
-
-#ifdef PNG_WRITE_SUPPORTED
-
-#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
-/* Write out all the unknown chunks for the current given location */
-static void
-write_unknown_chunks(png_structrp png_ptr, png_const_inforp info_ptr,
-    unsigned int where)
-{
-   if (info_ptr->unknown_chunks_num != 0)
-   {
-      png_const_unknown_chunkp up;
-
-      png_debug(5, "writing extra chunks");
-
-      for (up = info_ptr->unknown_chunks;
-           up < info_ptr->unknown_chunks + info_ptr->unknown_chunks_num;
-           ++up)
-         if ((up->location & where) != 0)
-      {
-         /* If per-chunk unknown chunk handling is enabled use it, otherwise
-          * just write the chunks the application has set.
-          */
-#ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED
-         int keep = png_handle_as_unknown(png_ptr, up->name);
-
-         /* NOTE: this code is radically different from the read side in the
-          * matter of handling an ancillary unknown chunk.  In the read side
-          * the default behavior is to discard it, in the code below the default
-          * behavior is to write it.  Critical chunks are, however, only
-          * written if explicitly listed or if the default is set to write all
-          * unknown chunks.
-          *
-          * The default handling is also slightly weird - it is not possible to
-          * stop the writing of all unsafe-to-copy chunks!
-          *
-          * TODO: REVIEW: this would seem to be a bug.
-          */
-         if (keep != PNG_HANDLE_CHUNK_NEVER &&
-             ((up->name[3] & 0x20) /* safe-to-copy overrides everything */ ||
-              keep == PNG_HANDLE_CHUNK_ALWAYS ||
-              (keep == PNG_HANDLE_CHUNK_AS_DEFAULT &&
-               png_ptr->unknown_default == PNG_HANDLE_CHUNK_ALWAYS)))
-#endif
-         {
-            /* TODO: review, what is wrong with a zero length unknown chunk? */
-            if (up->size == 0)
-               png_warning(png_ptr, "Writing zero-length unknown chunk");
-
-            png_write_chunk(png_ptr, up->name, up->data, up->size);
-         }
-      }
-   }
-}
-#endif /* WRITE_UNKNOWN_CHUNKS */
-
-/* Writes all the PNG information.  This is the suggested way to use the
- * library.  If you have a new chunk to add, make a function to write it,
- * and put it in the correct location here.  If you want the chunk written
- * after the image data, put it in png_write_end().  I strongly encourage
- * you to supply a PNG_INFO_<chunk> flag, and check info_ptr->valid before
- * writing the chunk, as that will keep the code from breaking if you want
- * to just write a plain PNG file.  If you have long comments, I suggest
- * writing them in png_write_end(), and compressing them.
- */
-void PNGAPI
-png_write_info_before_PLTE(png_structrp png_ptr, png_const_inforp info_ptr)
-{
-   png_debug(1, "in png_write_info_before_PLTE");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   if ((png_ptr->mode & PNG_WROTE_INFO_BEFORE_PLTE) == 0)
-   {
-      /* Write PNG signature */
-      png_write_sig(png_ptr);
-
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-      if ((png_ptr->mode & PNG_HAVE_PNG_SIGNATURE) != 0 && \
-          png_ptr->mng_features_permitted != 0)
-      {
-         png_warning(png_ptr,
-             "MNG features are not allowed in a PNG datastream");
-         png_ptr->mng_features_permitted = 0;
-      }
-#endif
-
-      /* Write IHDR information. */
-      png_write_IHDR(png_ptr, info_ptr->width, info_ptr->height,
-          info_ptr->bit_depth, info_ptr->color_type, info_ptr->compression_type,
-          info_ptr->filter_type,
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-          info_ptr->interlace_type
-#else
-          0
-#endif
-         );
-
-      /* The rest of these check to see if the valid field has the appropriate
-       * flag set, and if it does, writes the chunk.
-       *
-       * 1.6.0: COLORSPACE support controls the writing of these chunks too, and
-       * the chunks will be written if the WRITE routine is there and
-       * information * is available in the COLORSPACE. (See
-       * png_colorspace_sync_info in png.c for where the valid flags get set.)
-       *
-       * Under certain circumstances the colorspace can be invalidated without
-       * syncing the info_struct 'valid' flags; this happens if libpng detects
-       * an error and calls png_error while the color space is being set, yet
-       * the application continues writing the PNG.  So check the 'invalid'
-       * flag here too.
-       */
-#ifdef PNG_GAMMA_SUPPORTED
-#  ifdef PNG_WRITE_gAMA_SUPPORTED
-      if ((info_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) == 0 &&
-          (info_ptr->colorspace.flags & PNG_COLORSPACE_FROM_gAMA) != 0 &&
-          (info_ptr->valid & PNG_INFO_gAMA) != 0)
-         png_write_gAMA_fixed(png_ptr, info_ptr->colorspace.gamma);
-#  endif
-#endif
-
-#ifdef PNG_COLORSPACE_SUPPORTED
-      /* Write only one of sRGB or an ICC profile.  If a profile was supplied
-       * and it matches one of the known sRGB ones issue a warning.
-       */
-#  ifdef PNG_WRITE_iCCP_SUPPORTED
-         if ((info_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) == 0 &&
-             (info_ptr->valid & PNG_INFO_iCCP) != 0)
-         {
-#    ifdef PNG_WRITE_sRGB_SUPPORTED
-               if ((info_ptr->valid & PNG_INFO_sRGB) != 0)
-                  png_app_warning(png_ptr,
-                      "profile matches sRGB but writing iCCP instead");
-#     endif
-
-            png_write_iCCP(png_ptr, info_ptr->iccp_name,
-                info_ptr->iccp_profile);
-         }
-#     ifdef PNG_WRITE_sRGB_SUPPORTED
-         else
-#     endif
-#  endif
-
-#  ifdef PNG_WRITE_sRGB_SUPPORTED
-         if ((info_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) == 0 &&
-             (info_ptr->valid & PNG_INFO_sRGB) != 0)
-            png_write_sRGB(png_ptr, info_ptr->colorspace.rendering_intent);
-#  endif /* WRITE_sRGB */
-#endif /* COLORSPACE */
-
-#ifdef PNG_WRITE_sBIT_SUPPORTED
-         if ((info_ptr->valid & PNG_INFO_sBIT) != 0)
-            png_write_sBIT(png_ptr, &(info_ptr->sig_bit), info_ptr->color_type);
-#endif
-
-#ifdef PNG_COLORSPACE_SUPPORTED
-#  ifdef PNG_WRITE_cHRM_SUPPORTED
-         if ((info_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) == 0 &&
-             (info_ptr->colorspace.flags & PNG_COLORSPACE_FROM_cHRM) != 0 &&
-             (info_ptr->valid & PNG_INFO_cHRM) != 0)
-            png_write_cHRM_fixed(png_ptr, &info_ptr->colorspace.end_points_xy);
-#  endif
-#endif
-
-#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
-         write_unknown_chunks(png_ptr, info_ptr, PNG_HAVE_IHDR);
-#endif
-
-      png_ptr->mode |= PNG_WROTE_INFO_BEFORE_PLTE;
-   }
-}
-
-void PNGAPI
-png_write_info(png_structrp png_ptr, png_const_inforp info_ptr)
-{
-#if defined(PNG_WRITE_TEXT_SUPPORTED) || defined(PNG_WRITE_sPLT_SUPPORTED)
-   int i;
-#endif
-
-   png_debug(1, "in png_write_info");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   png_write_info_before_PLTE(png_ptr, info_ptr);
-
-   if ((info_ptr->valid & PNG_INFO_PLTE) != 0)
-      png_write_PLTE(png_ptr, info_ptr->palette,
-          (png_uint_32)info_ptr->num_palette);
-
-   else if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-      png_error(png_ptr, "Valid palette required for paletted images");
-
-#ifdef PNG_WRITE_tRNS_SUPPORTED
-   if ((info_ptr->valid & PNG_INFO_tRNS) !=0)
-   {
-#ifdef PNG_WRITE_INVERT_ALPHA_SUPPORTED
-      /* Invert the alpha channel (in tRNS) */
-      if ((png_ptr->transformations & PNG_INVERT_ALPHA) != 0 &&
-          info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-      {
-         int j, jend;
-
-         jend = info_ptr->num_trans;
-         if (jend > PNG_MAX_PALETTE_LENGTH)
-            jend = PNG_MAX_PALETTE_LENGTH;
-
-         for (j = 0; j<jend; ++j)
-            info_ptr->trans_alpha[j] =
-               (png_byte)(255 - info_ptr->trans_alpha[j]);
-      }
-#endif
-      png_write_tRNS(png_ptr, info_ptr->trans_alpha, &(info_ptr->trans_color),
-          info_ptr->num_trans, info_ptr->color_type);
-   }
-#endif
-#ifdef PNG_WRITE_bKGD_SUPPORTED
-   if ((info_ptr->valid & PNG_INFO_bKGD) != 0)
-      png_write_bKGD(png_ptr, &(info_ptr->background), info_ptr->color_type);
-#endif
-
-#ifdef PNG_WRITE_eXIf_SUPPORTED
-   if ((info_ptr->valid & PNG_INFO_eXIf) != 0)
-   {
-      png_write_eXIf(png_ptr, info_ptr->exif, info_ptr->num_exif);
-      png_ptr->mode |= PNG_WROTE_eXIf;
-   }
-#endif
-
-#ifdef PNG_WRITE_hIST_SUPPORTED
-   if ((info_ptr->valid & PNG_INFO_hIST) != 0)
-      png_write_hIST(png_ptr, info_ptr->hist, info_ptr->num_palette);
-#endif
-
-#ifdef PNG_WRITE_oFFs_SUPPORTED
-   if ((info_ptr->valid & PNG_INFO_oFFs) != 0)
-      png_write_oFFs(png_ptr, info_ptr->x_offset, info_ptr->y_offset,
-          info_ptr->offset_unit_type);
-#endif
-
-#ifdef PNG_WRITE_pCAL_SUPPORTED
-   if ((info_ptr->valid & PNG_INFO_pCAL) != 0)
-      png_write_pCAL(png_ptr, info_ptr->pcal_purpose, info_ptr->pcal_X0,
-          info_ptr->pcal_X1, info_ptr->pcal_type, info_ptr->pcal_nparams,
-          info_ptr->pcal_units, info_ptr->pcal_params);
-#endif
-
-#ifdef PNG_WRITE_sCAL_SUPPORTED
-   if ((info_ptr->valid & PNG_INFO_sCAL) != 0)
-      png_write_sCAL_s(png_ptr, (int)info_ptr->scal_unit,
-          info_ptr->scal_s_width, info_ptr->scal_s_height);
-#endif /* sCAL */
-
-#ifdef PNG_WRITE_pHYs_SUPPORTED
-   if ((info_ptr->valid & PNG_INFO_pHYs) != 0)
-      png_write_pHYs(png_ptr, info_ptr->x_pixels_per_unit,
-          info_ptr->y_pixels_per_unit, info_ptr->phys_unit_type);
-#endif /* pHYs */
-
-#ifdef PNG_WRITE_tIME_SUPPORTED
-   if ((info_ptr->valid & PNG_INFO_tIME) != 0)
-   {
-      png_write_tIME(png_ptr, &(info_ptr->mod_time));
-      png_ptr->mode |= PNG_WROTE_tIME;
-   }
-#endif /* tIME */
-
-#ifdef PNG_WRITE_sPLT_SUPPORTED
-   if ((info_ptr->valid & PNG_INFO_sPLT) != 0)
-      for (i = 0; i < (int)info_ptr->splt_palettes_num; i++)
-         png_write_sPLT(png_ptr, info_ptr->splt_palettes + i);
-#endif /* sPLT */
-
-#ifdef PNG_WRITE_TEXT_SUPPORTED
-   /* Check to see if we need to write text chunks */
-   for (i = 0; i < info_ptr->num_text; i++)
-   {
-      png_debug2(2, "Writing header text chunk %d, type %d", i,
-          info_ptr->text[i].compression);
-      /* An internationalized chunk? */
-      if (info_ptr->text[i].compression > 0)
-      {
-#ifdef PNG_WRITE_iTXt_SUPPORTED
-         /* Write international chunk */
-         png_write_iTXt(png_ptr,
-             info_ptr->text[i].compression,
-             info_ptr->text[i].key,
-             info_ptr->text[i].lang,
-             info_ptr->text[i].lang_key,
-             info_ptr->text[i].text);
-         /* Mark this chunk as written */
-         if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_NONE)
-            info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR;
-         else
-            info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_zTXt_WR;
-#else
-         png_warning(png_ptr, "Unable to write international text");
-#endif
-      }
-
-      /* If we want a compressed text chunk */
-      else if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_zTXt)
-      {
-#ifdef PNG_WRITE_zTXt_SUPPORTED
-         /* Write compressed chunk */
-         png_write_zTXt(png_ptr, info_ptr->text[i].key,
-             info_ptr->text[i].text, info_ptr->text[i].compression);
-         /* Mark this chunk as written */
-         info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_zTXt_WR;
-#else
-         png_warning(png_ptr, "Unable to write compressed text");
-#endif
-      }
-
-      else if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_NONE)
-      {
-#ifdef PNG_WRITE_tEXt_SUPPORTED
-         /* Write uncompressed chunk */
-         png_write_tEXt(png_ptr, info_ptr->text[i].key,
-             info_ptr->text[i].text,
-             0);
-         /* Mark this chunk as written */
-         info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR;
-#else
-         /* Can't get here */
-         png_warning(png_ptr, "Unable to write uncompressed text");
-#endif
-      }
-   }
-#endif /* tEXt */
-
-#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
-   write_unknown_chunks(png_ptr, info_ptr, PNG_HAVE_PLTE);
-#endif
-}
-
-/* Writes the end of the PNG file.  If you don't want to write comments or
- * time information, you can pass NULL for info.  If you already wrote these
- * in png_write_info(), do not write them again here.  If you have long
- * comments, I suggest writing them here, and compressing them.
- */
-void PNGAPI
-png_write_end(png_structrp png_ptr, png_inforp info_ptr)
-{
-   png_debug(1, "in png_write_end");
-
-   if (png_ptr == NULL)
-      return;
-
-   if ((png_ptr->mode & PNG_HAVE_IDAT) == 0)
-      png_error(png_ptr, "No IDATs written into file");
-
-#ifdef PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED
-   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
-       png_ptr->num_palette_max >= png_ptr->num_palette)
-      png_benign_error(png_ptr, "Wrote palette index exceeding num_palette");
-#endif
-
-   /* See if user wants us to write information chunks */
-   if (info_ptr != NULL)
-   {
-#ifdef PNG_WRITE_TEXT_SUPPORTED
-      int i; /* local index variable */
-#endif
-#ifdef PNG_WRITE_tIME_SUPPORTED
-      /* Check to see if user has supplied a time chunk */
-      if ((info_ptr->valid & PNG_INFO_tIME) != 0 &&
-          (png_ptr->mode & PNG_WROTE_tIME) == 0)
-         png_write_tIME(png_ptr, &(info_ptr->mod_time));
-
-#endif
-#ifdef PNG_WRITE_TEXT_SUPPORTED
-      /* Loop through comment chunks */
-      for (i = 0; i < info_ptr->num_text; i++)
-      {
-         png_debug2(2, "Writing trailer text chunk %d, type %d", i,
-             info_ptr->text[i].compression);
-         /* An internationalized chunk? */
-         if (info_ptr->text[i].compression > 0)
-         {
-#ifdef PNG_WRITE_iTXt_SUPPORTED
-            /* Write international chunk */
-            png_write_iTXt(png_ptr,
-                info_ptr->text[i].compression,
-                info_ptr->text[i].key,
-                info_ptr->text[i].lang,
-                info_ptr->text[i].lang_key,
-                info_ptr->text[i].text);
-            /* Mark this chunk as written */
-            if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_NONE)
-               info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR;
-            else
-               info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_zTXt_WR;
-#else
-            png_warning(png_ptr, "Unable to write international text");
-#endif
-         }
-
-         else if (info_ptr->text[i].compression >= PNG_TEXT_COMPRESSION_zTXt)
-         {
-#ifdef PNG_WRITE_zTXt_SUPPORTED
-            /* Write compressed chunk */
-            png_write_zTXt(png_ptr, info_ptr->text[i].key,
-                info_ptr->text[i].text, info_ptr->text[i].compression);
-            /* Mark this chunk as written */
-            info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_zTXt_WR;
-#else
-            png_warning(png_ptr, "Unable to write compressed text");
-#endif
-         }
-
-         else if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_NONE)
-         {
-#ifdef PNG_WRITE_tEXt_SUPPORTED
-            /* Write uncompressed chunk */
-            png_write_tEXt(png_ptr, info_ptr->text[i].key,
-                info_ptr->text[i].text, 0);
-            /* Mark this chunk as written */
-            info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR;
-#else
-            png_warning(png_ptr, "Unable to write uncompressed text");
-#endif
-         }
-      }
-#endif
-
-#ifdef PNG_WRITE_eXIf_SUPPORTED
-      if ((info_ptr->valid & PNG_INFO_eXIf) != 0 &&
-          (png_ptr->mode & PNG_WROTE_eXIf) == 0)
-         png_write_eXIf(png_ptr, info_ptr->exif, info_ptr->num_exif);
-#endif
-
-#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
-      write_unknown_chunks(png_ptr, info_ptr, PNG_AFTER_IDAT);
-#endif
-   }
-
-   png_ptr->mode |= PNG_AFTER_IDAT;
-
-   /* Write end of PNG file */
-   png_write_IEND(png_ptr);
-
-   /* This flush, added in libpng-1.0.8, removed from libpng-1.0.9beta03,
-    * and restored again in libpng-1.2.30, may cause some applications that
-    * do not set png_ptr->output_flush_fn to crash.  If your application
-    * experiences a problem, please try building libpng with
-    * PNG_WRITE_FLUSH_AFTER_IEND_SUPPORTED defined, and report the event to
-    * png-mng-implement at lists.sf.net .
-    */
-#ifdef PNG_WRITE_FLUSH_SUPPORTED
-#  ifdef PNG_WRITE_FLUSH_AFTER_IEND_SUPPORTED
-   png_flush(png_ptr);
-#  endif
-#endif
-}
-
-#ifdef PNG_CONVERT_tIME_SUPPORTED
-void PNGAPI
-png_convert_from_struct_tm(png_timep ptime, const struct tm * ttime)
-{
-   png_debug(1, "in png_convert_from_struct_tm");
-
-   ptime->year = (png_uint_16)(1900 + ttime->tm_year);
-   ptime->month = (png_byte)(ttime->tm_mon + 1);
-   ptime->day = (png_byte)ttime->tm_mday;
-   ptime->hour = (png_byte)ttime->tm_hour;
-   ptime->minute = (png_byte)ttime->tm_min;
-   ptime->second = (png_byte)ttime->tm_sec;
-}
-
-void PNGAPI
-png_convert_from_time_t(png_timep ptime, time_t ttime)
-{
-   struct tm *tbuf;
-
-   png_debug(1, "in png_convert_from_time_t");
-
-   tbuf = gmtime(&ttime);
-   if (tbuf == NULL)
-   {
-      /* TODO: add a safe function which takes a png_ptr argument and raises
-       * a png_error if the ttime argument is invalid and the call to gmtime
-       * fails as a consequence.
-       */
-      memset(ptime, 0, sizeof(*ptime));
-      return;
-   }
-
-   png_convert_from_struct_tm(ptime, tbuf);
-}
-#endif
-
-/* Initialize png_ptr structure, and allocate any memory needed */
-PNG_FUNCTION(png_structp,PNGAPI
-png_create_write_struct,(png_const_charp user_png_ver, png_voidp error_ptr,
-    png_error_ptr error_fn, png_error_ptr warn_fn),PNG_ALLOCATED)
-{
-#ifndef PNG_USER_MEM_SUPPORTED
-   png_structrp png_ptr = png_create_png_struct(user_png_ver, error_ptr,
-       error_fn, warn_fn, NULL, NULL, NULL);
-#else
-   return png_create_write_struct_2(user_png_ver, error_ptr, error_fn,
-       warn_fn, NULL, NULL, NULL);
-}
-
-/* Alternate initialize png_ptr structure, and allocate any memory needed */
-PNG_FUNCTION(png_structp,PNGAPI
-png_create_write_struct_2,(png_const_charp user_png_ver, png_voidp error_ptr,
-    png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr,
-    png_malloc_ptr malloc_fn, png_free_ptr free_fn),PNG_ALLOCATED)
-{
-   png_structrp png_ptr = png_create_png_struct(user_png_ver, error_ptr,
-       error_fn, warn_fn, mem_ptr, malloc_fn, free_fn);
-#endif /* USER_MEM */
-   if (png_ptr != NULL)
-   {
-      /* Set the zlib control values to defaults; they can be overridden by the
-       * application after the struct has been created.
-       */
-      png_ptr->zbuffer_size = PNG_ZBUF_SIZE;
-
-      /* The 'zlib_strategy' setting is irrelevant because png_default_claim in
-       * pngwutil.c defaults it according to whether or not filters will be
-       * used, and ignores this setting.
-       */
-      png_ptr->zlib_strategy = PNG_Z_DEFAULT_STRATEGY;
-      png_ptr->zlib_level = PNG_Z_DEFAULT_COMPRESSION;
-      png_ptr->zlib_mem_level = 8;
-      png_ptr->zlib_window_bits = 15;
-      png_ptr->zlib_method = 8;
-
-#ifdef PNG_WRITE_COMPRESSED_TEXT_SUPPORTED
-      png_ptr->zlib_text_strategy = PNG_TEXT_Z_DEFAULT_STRATEGY;
-      png_ptr->zlib_text_level = PNG_TEXT_Z_DEFAULT_COMPRESSION;
-      png_ptr->zlib_text_mem_level = 8;
-      png_ptr->zlib_text_window_bits = 15;
-      png_ptr->zlib_text_method = 8;
-#endif /* WRITE_COMPRESSED_TEXT */
-
-      /* This is a highly dubious configuration option; by default it is off,
-       * but it may be appropriate for private builds that are testing
-       * extensions not conformant to the current specification, or of
-       * applications that must not fail to write at all costs!
-       */
-#ifdef PNG_BENIGN_WRITE_ERRORS_SUPPORTED
-      /* In stable builds only warn if an application error can be completely
-       * handled.
-       */
-      png_ptr->flags |= PNG_FLAG_BENIGN_ERRORS_WARN;
-#endif
-
-      /* App warnings are warnings in release (or release candidate) builds but
-       * are errors during development.
-       */
-#if PNG_RELEASE_BUILD
-      png_ptr->flags |= PNG_FLAG_APP_WARNINGS_WARN;
-#endif
-
-      /* TODO: delay this, it can be done in png_init_io() (if the app doesn't
-       * do it itself) avoiding setting the default function if it is not
-       * required.
-       */
-      png_set_write_fn(png_ptr, NULL, NULL, NULL);
-   }
-
-   return png_ptr;
-}
-
-
-/* Write a few rows of image data.  If the image is interlaced,
- * either you will have to write the 7 sub images, or, if you
- * have called png_set_interlace_handling(), you will have to
- * "write" the image seven times.
- */
-void PNGAPI
-png_write_rows(png_structrp png_ptr, png_bytepp row,
-    png_uint_32 num_rows)
-{
-   png_uint_32 i; /* row counter */
-   png_bytepp rp; /* row pointer */
-
-   png_debug(1, "in png_write_rows");
-
-   if (png_ptr == NULL)
-      return;
-
-   /* Loop through the rows */
-   for (i = 0, rp = row; i < num_rows; i++, rp++)
-   {
-      png_write_row(png_ptr, *rp);
-   }
-}
-
-/* Write the image.  You only need to call this function once, even
- * if you are writing an interlaced image.
- */
-void PNGAPI
-png_write_image(png_structrp png_ptr, png_bytepp image)
-{
-   png_uint_32 i; /* row index */
-   int pass, num_pass; /* pass variables */
-   png_bytepp rp; /* points to current row */
-
-   if (png_ptr == NULL)
-      return;
-
-   png_debug(1, "in png_write_image");
-
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-   /* Initialize interlace handling.  If image is not interlaced,
-    * this will set pass to 1
-    */
-   num_pass = png_set_interlace_handling(png_ptr);
-#else
-   num_pass = 1;
-#endif
-   /* Loop through passes */
-   for (pass = 0; pass < num_pass; pass++)
-   {
-      /* Loop through image */
-      for (i = 0, rp = image; i < png_ptr->height; i++, rp++)
-      {
-         png_write_row(png_ptr, *rp);
-      }
-   }
-}
-
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-/* Performs intrapixel differencing  */
-static void
-png_do_write_intrapixel(png_row_infop row_info, png_bytep row)
-{
-   png_debug(1, "in png_do_write_intrapixel");
-
-   if ((row_info->color_type & PNG_COLOR_MASK_COLOR) != 0)
-   {
-      int bytes_per_pixel;
-      png_uint_32 row_width = row_info->width;
-      if (row_info->bit_depth == 8)
-      {
-         png_bytep rp;
-         png_uint_32 i;
-
-         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
-            bytes_per_pixel = 3;
-
-         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
-            bytes_per_pixel = 4;
-
-         else
-            return;
-
-         for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel)
-         {
-            *(rp)     = (png_byte)(*rp       - *(rp + 1));
-            *(rp + 2) = (png_byte)(*(rp + 2) - *(rp + 1));
-         }
-      }
-
-#ifdef PNG_WRITE_16BIT_SUPPORTED
-      else if (row_info->bit_depth == 16)
-      {
-         png_bytep rp;
-         png_uint_32 i;
-
-         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
-            bytes_per_pixel = 6;
-
-         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
-            bytes_per_pixel = 8;
-
-         else
-            return;
-
-         for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel)
-         {
-            png_uint_32 s0   = (png_uint_32)(*(rp    ) << 8) | *(rp + 1);
-            png_uint_32 s1   = (png_uint_32)(*(rp + 2) << 8) | *(rp + 3);
-            png_uint_32 s2   = (png_uint_32)(*(rp + 4) << 8) | *(rp + 5);
-            png_uint_32 red  = (png_uint_32)((s0 - s1) & 0xffffL);
-            png_uint_32 blue = (png_uint_32)((s2 - s1) & 0xffffL);
-            *(rp    ) = (png_byte)(red >> 8);
-            *(rp + 1) = (png_byte)red;
-            *(rp + 4) = (png_byte)(blue >> 8);
-            *(rp + 5) = (png_byte)blue;
-         }
-      }
-#endif /* WRITE_16BIT */
-   }
-}
-#endif /* MNG_FEATURES */
-
-/* Called by user to write a row of image data */
-void PNGAPI
-png_write_row(png_structrp png_ptr, png_const_bytep row)
-{
-   /* 1.5.6: moved from png_struct to be a local structure: */
-   png_row_info row_info;
-
-   png_debug2(1, "in png_write_row (row %u, pass %d)",
-       png_ptr->row_number, png_ptr->pass);
-
-   if (png_ptr == NULL)
-      return;
-
-   /* Initialize transformations and other stuff if first time */
-   if (png_ptr->row_number == 0 && png_ptr->pass == 0)
-   {
-      /* Make sure we wrote the header info */
-      if ((png_ptr->mode & PNG_WROTE_INFO_BEFORE_PLTE) == 0)
-         png_error(png_ptr,
-             "png_write_info was never called before png_write_row");
-
-      /* Check for transforms that have been set but were defined out */
-#if !defined(PNG_WRITE_INVERT_SUPPORTED) && defined(PNG_READ_INVERT_SUPPORTED)
-      if ((png_ptr->transformations & PNG_INVERT_MONO) != 0)
-         png_warning(png_ptr, "PNG_WRITE_INVERT_SUPPORTED is not defined");
-#endif
-
-#if !defined(PNG_WRITE_FILLER_SUPPORTED) && defined(PNG_READ_FILLER_SUPPORTED)
-      if ((png_ptr->transformations & PNG_FILLER) != 0)
-         png_warning(png_ptr, "PNG_WRITE_FILLER_SUPPORTED is not defined");
-#endif
-#if !defined(PNG_WRITE_PACKSWAP_SUPPORTED) && \
-    defined(PNG_READ_PACKSWAP_SUPPORTED)
-      if ((png_ptr->transformations & PNG_PACKSWAP) != 0)
-         png_warning(png_ptr,
-             "PNG_WRITE_PACKSWAP_SUPPORTED is not defined");
-#endif
-
-#if !defined(PNG_WRITE_PACK_SUPPORTED) && defined(PNG_READ_PACK_SUPPORTED)
-      if ((png_ptr->transformations & PNG_PACK) != 0)
-         png_warning(png_ptr, "PNG_WRITE_PACK_SUPPORTED is not defined");
-#endif
-
-#if !defined(PNG_WRITE_SHIFT_SUPPORTED) && defined(PNG_READ_SHIFT_SUPPORTED)
-      if ((png_ptr->transformations & PNG_SHIFT) != 0)
-         png_warning(png_ptr, "PNG_WRITE_SHIFT_SUPPORTED is not defined");
-#endif
-
-#if !defined(PNG_WRITE_BGR_SUPPORTED) && defined(PNG_READ_BGR_SUPPORTED)
-      if ((png_ptr->transformations & PNG_BGR) != 0)
-         png_warning(png_ptr, "PNG_WRITE_BGR_SUPPORTED is not defined");
-#endif
-
-#if !defined(PNG_WRITE_SWAP_SUPPORTED) && defined(PNG_READ_SWAP_SUPPORTED)
-      if ((png_ptr->transformations & PNG_SWAP_BYTES) != 0)
-         png_warning(png_ptr, "PNG_WRITE_SWAP_SUPPORTED is not defined");
-#endif
-
-      png_write_start_row(png_ptr);
-   }
-
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-   /* If interlaced and not interested in row, return */
-   if (png_ptr->interlaced != 0 &&
-       (png_ptr->transformations & PNG_INTERLACE) != 0)
-   {
-      switch (png_ptr->pass)
-      {
-         case 0:
-            if ((png_ptr->row_number & 0x07) != 0)
-            {
-               png_write_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         case 1:
-            if ((png_ptr->row_number & 0x07) != 0 || png_ptr->width < 5)
-            {
-               png_write_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         case 2:
-            if ((png_ptr->row_number & 0x07) != 4)
-            {
-               png_write_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         case 3:
-            if ((png_ptr->row_number & 0x03) != 0 || png_ptr->width < 3)
-            {
-               png_write_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         case 4:
-            if ((png_ptr->row_number & 0x03) != 2)
-            {
-               png_write_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         case 5:
-            if ((png_ptr->row_number & 0x01) != 0 || png_ptr->width < 2)
-            {
-               png_write_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         case 6:
-            if ((png_ptr->row_number & 0x01) == 0)
-            {
-               png_write_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         default: /* error: ignore it */
-            break;
-      }
-   }
-#endif
-
-   /* Set up row info for transformations */
-   row_info.color_type = png_ptr->color_type;
-   row_info.width = png_ptr->usr_width;
-   row_info.channels = png_ptr->usr_channels;
-   row_info.bit_depth = png_ptr->usr_bit_depth;
-   row_info.pixel_depth = (png_byte)(row_info.bit_depth * row_info.channels);
-   row_info.rowbytes = PNG_ROWBYTES(row_info.pixel_depth, row_info.width);
-
-   png_debug1(3, "row_info->color_type = %d", row_info.color_type);
-   png_debug1(3, "row_info->width = %u", row_info.width);
-   png_debug1(3, "row_info->channels = %d", row_info.channels);
-   png_debug1(3, "row_info->bit_depth = %d", row_info.bit_depth);
-   png_debug1(3, "row_info->pixel_depth = %d", row_info.pixel_depth);
-   png_debug1(3, "row_info->rowbytes = %lu", (unsigned long)row_info.rowbytes);
-
-   /* Copy user's row into buffer, leaving room for filter byte. */
-   memcpy(png_ptr->row_buf + 1, row, row_info.rowbytes);
-
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-   /* Handle interlacing */
-   if (png_ptr->interlaced && png_ptr->pass < 6 &&
-       (png_ptr->transformations & PNG_INTERLACE) != 0)
-   {
-      png_do_write_interlace(&row_info, png_ptr->row_buf + 1, png_ptr->pass);
-      /* This should always get caught above, but still ... */
-      if (row_info.width == 0)
-      {
-         png_write_finish_row(png_ptr);
-         return;
-      }
-   }
-#endif
-
-#ifdef PNG_WRITE_TRANSFORMS_SUPPORTED
-   /* Handle other transformations */
-   if (png_ptr->transformations != 0)
-      png_do_write_transformations(png_ptr, &row_info);
-#endif
-
-   /* At this point the row_info pixel depth must match the 'transformed' depth,
-    * which is also the output depth.
-    */
-   if (row_info.pixel_depth != png_ptr->pixel_depth ||
-       row_info.pixel_depth != png_ptr->transformed_pixel_depth)
-      png_error(png_ptr, "internal write transform logic error");
-
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-   /* Write filter_method 64 (intrapixel differencing) only if
-    * 1. Libpng was compiled with PNG_MNG_FEATURES_SUPPORTED and
-    * 2. Libpng did not write a PNG signature (this filter_method is only
-    *    used in PNG datastreams that are embedded in MNG datastreams) and
-    * 3. The application called png_permit_mng_features with a mask that
-    *    included PNG_FLAG_MNG_FILTER_64 and
-    * 4. The filter_method is 64 and
-    * 5. The color_type is RGB or RGBA
-    */
-   if ((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) != 0 &&
-       (png_ptr->filter_type == PNG_INTRAPIXEL_DIFFERENCING))
-   {
-      /* Intrapixel differencing */
-      png_do_write_intrapixel(&row_info, png_ptr->row_buf + 1);
-   }
-#endif
-
-/* Added at libpng-1.5.10 */
-#ifdef PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED
-   /* Check for out-of-range palette index */
-   if (row_info.color_type == PNG_COLOR_TYPE_PALETTE &&
-       png_ptr->num_palette_max >= 0)
-      png_do_check_palette_indexes(png_ptr, &row_info);
-#endif
-
-   /* Find a filter if necessary, filter the row and write it out. */
-   png_write_find_filter(png_ptr, &row_info);
-
-   if (png_ptr->write_row_fn != NULL)
-      (*(png_ptr->write_row_fn))(png_ptr, png_ptr->row_number, png_ptr->pass);
-}
-
-#ifdef PNG_WRITE_FLUSH_SUPPORTED
-/* Set the automatic flush interval or 0 to turn flushing off */
-void PNGAPI
-png_set_flush(png_structrp png_ptr, int nrows)
-{
-   png_debug(1, "in png_set_flush");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->flush_dist = (nrows < 0 ? 0 : (png_uint_32)nrows);
-}
-
-/* Flush the current output buffers now */
-void PNGAPI
-png_write_flush(png_structrp png_ptr)
-{
-   png_debug(1, "in png_write_flush");
-
-   if (png_ptr == NULL)
-      return;
-
-   /* We have already written out all of the data */
-   if (png_ptr->row_number >= png_ptr->num_rows)
-      return;
-
-   png_compress_IDAT(png_ptr, NULL, 0, Z_SYNC_FLUSH);
-   png_ptr->flush_rows = 0;
-   png_flush(png_ptr);
-}
-#endif /* WRITE_FLUSH */
-
-/* Free any memory used in png_ptr struct without freeing the struct itself. */
-static void
-png_write_destroy(png_structrp png_ptr)
-{
-   png_debug(1, "in png_write_destroy");
-
-   /* Free any memory zlib uses */
-   if ((png_ptr->flags & PNG_FLAG_ZSTREAM_INITIALIZED) != 0)
-      deflateEnd(&png_ptr->zstream);
-
-   /* Free our memory.  png_free checks NULL for us. */
-   png_free_buffer_list(png_ptr, &png_ptr->zbuffer_list);
-   png_free(png_ptr, png_ptr->row_buf);
-   png_ptr->row_buf = NULL;
-#ifdef PNG_WRITE_FILTER_SUPPORTED
-   png_free(png_ptr, png_ptr->prev_row);
-   png_free(png_ptr, png_ptr->try_row);
-   png_free(png_ptr, png_ptr->tst_row);
-   png_ptr->prev_row = NULL;
-   png_ptr->try_row = NULL;
-   png_ptr->tst_row = NULL;
-#endif
-
-#ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED
-   png_free(png_ptr, png_ptr->chunk_list);
-   png_ptr->chunk_list = NULL;
-#endif
-
-   /* The error handling and memory handling information is left intact at this
-    * point: the jmp_buf may still have to be freed.  See png_destroy_png_struct
-    * for how this happens.
-    */
-}
-
-/* Free all memory used by the write.
- * In libpng 1.6.0 this API changed quietly to no longer accept a NULL value for
- * *png_ptr_ptr.  Prior to 1.6.0 it would accept such a value and it would free
- * the passed in info_structs but it would quietly fail to free any of the data
- * inside them.  In 1.6.0 it quietly does nothing (it has to be quiet because it
- * has no png_ptr.)
- */
-void PNGAPI
-png_destroy_write_struct(png_structpp png_ptr_ptr, png_infopp info_ptr_ptr)
-{
-   png_debug(1, "in png_destroy_write_struct");
-
-   if (png_ptr_ptr != NULL)
-   {
-      png_structrp png_ptr = *png_ptr_ptr;
-
-      if (png_ptr != NULL) /* added in libpng 1.6.0 */
-      {
-         png_destroy_info_struct(png_ptr, info_ptr_ptr);
-
-         *png_ptr_ptr = NULL;
-         png_write_destroy(png_ptr);
-         png_destroy_png_struct(png_ptr);
-      }
-   }
-}
-
-/* Allow the application to select one or more row filters to use. */
-void PNGAPI
-png_set_filter(png_structrp png_ptr, int method, int filters)
-{
-   png_debug(1, "in png_set_filter");
-
-   if (png_ptr == NULL)
-      return;
-
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-   if ((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) != 0 &&
-       (method == PNG_INTRAPIXEL_DIFFERENCING))
-      method = PNG_FILTER_TYPE_BASE;
-
-#endif
-   if (method == PNG_FILTER_TYPE_BASE)
-   {
-      switch (filters & (PNG_ALL_FILTERS | 0x07))
-      {
-#ifdef PNG_WRITE_FILTER_SUPPORTED
-         case 5:
-         case 6:
-         case 7: png_app_error(png_ptr, "Unknown row filter for method 0");
-#endif /* WRITE_FILTER */
-            /* FALLTHROUGH */
-         case PNG_FILTER_VALUE_NONE:
-            png_ptr->do_filter = PNG_FILTER_NONE; break;
-
-#ifdef PNG_WRITE_FILTER_SUPPORTED
-         case PNG_FILTER_VALUE_SUB:
-            png_ptr->do_filter = PNG_FILTER_SUB; break;
-
-         case PNG_FILTER_VALUE_UP:
-            png_ptr->do_filter = PNG_FILTER_UP; break;
-
-         case PNG_FILTER_VALUE_AVG:
-            png_ptr->do_filter = PNG_FILTER_AVG; break;
-
-         case PNG_FILTER_VALUE_PAETH:
-            png_ptr->do_filter = PNG_FILTER_PAETH; break;
-
-         default:
-            png_ptr->do_filter = (png_byte)filters; break;
-#else
-         default:
-            png_app_error(png_ptr, "Unknown row filter for method 0");
-#endif /* WRITE_FILTER */
-      }
-
-#ifdef PNG_WRITE_FILTER_SUPPORTED
-      /* If we have allocated the row_buf, this means we have already started
-       * with the image and we should have allocated all of the filter buffers
-       * that have been selected.  If prev_row isn't already allocated, then
-       * it is too late to start using the filters that need it, since we
-       * will be missing the data in the previous row.  If an application
-       * wants to start and stop using particular filters during compression,
-       * it should start out with all of the filters, and then remove them
-       * or add them back after the start of compression.
-       *
-       * NOTE: this is a nasty constraint on the code, because it means that the
-       * prev_row buffer must be maintained even if there are currently no
-       * 'prev_row' requiring filters active.
-       */
-      if (png_ptr->row_buf != NULL)
-      {
-         int num_filters;
-         png_alloc_size_t buf_size;
-
-         /* Repeat the checks in png_write_start_row; 1 pixel high or wide
-          * images cannot benefit from certain filters.  If this isn't done here
-          * the check below will fire on 1 pixel high images.
-          */
-         if (png_ptr->height == 1)
-            filters &= ~(PNG_FILTER_UP|PNG_FILTER_AVG|PNG_FILTER_PAETH);
-
-         if (png_ptr->width == 1)
-            filters &= ~(PNG_FILTER_SUB|PNG_FILTER_AVG|PNG_FILTER_PAETH);
-
-         if ((filters & (PNG_FILTER_UP|PNG_FILTER_AVG|PNG_FILTER_PAETH)) != 0
-            && png_ptr->prev_row == NULL)
-         {
-            /* This is the error case, however it is benign - the previous row
-             * is not available so the filter can't be used.  Just warn here.
-             */
-            png_app_warning(png_ptr,
-                "png_set_filter: UP/AVG/PAETH cannot be added after start");
-            filters &= ~(PNG_FILTER_UP|PNG_FILTER_AVG|PNG_FILTER_PAETH);
-         }
-
-         num_filters = 0;
-
-         if (filters & PNG_FILTER_SUB)
-            num_filters++;
-
-         if (filters & PNG_FILTER_UP)
-            num_filters++;
-
-         if (filters & PNG_FILTER_AVG)
-            num_filters++;
-
-         if (filters & PNG_FILTER_PAETH)
-            num_filters++;
-
-         /* Allocate needed row buffers if they have not already been
-          * allocated.
-          */
-         buf_size = PNG_ROWBYTES(png_ptr->usr_channels * png_ptr->usr_bit_depth,
-             png_ptr->width) + 1;
-
-         if (png_ptr->try_row == NULL)
-            png_ptr->try_row = png_voidcast(png_bytep,
-                png_malloc(png_ptr, buf_size));
-
-         if (num_filters > 1)
-         {
-            if (png_ptr->tst_row == NULL)
-               png_ptr->tst_row = png_voidcast(png_bytep,
-                   png_malloc(png_ptr, buf_size));
-         }
-      }
-      png_ptr->do_filter = (png_byte)filters;
-#endif
-   }
-   else
-      png_error(png_ptr, "Unknown custom filter method");
-}
-
-#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED /* DEPRECATED */
-/* Provide floating and fixed point APIs */
-#ifdef PNG_FLOATING_POINT_SUPPORTED
-void PNGAPI
-png_set_filter_heuristics(png_structrp png_ptr, int heuristic_method,
-    int num_weights, png_const_doublep filter_weights,
-    png_const_doublep filter_costs)
-{
-   PNG_UNUSED(png_ptr)
-   PNG_UNUSED(heuristic_method)
-   PNG_UNUSED(num_weights)
-   PNG_UNUSED(filter_weights)
-   PNG_UNUSED(filter_costs)
-}
-#endif /* FLOATING_POINT */
-
-#ifdef PNG_FIXED_POINT_SUPPORTED
-void PNGAPI
-png_set_filter_heuristics_fixed(png_structrp png_ptr, int heuristic_method,
-    int num_weights, png_const_fixed_point_p filter_weights,
-    png_const_fixed_point_p filter_costs)
-{
-   PNG_UNUSED(png_ptr)
-   PNG_UNUSED(heuristic_method)
-   PNG_UNUSED(num_weights)
-   PNG_UNUSED(filter_weights)
-   PNG_UNUSED(filter_costs)
-}
-#endif /* FIXED_POINT */
-#endif /* WRITE_WEIGHTED_FILTER */
-
-#ifdef PNG_WRITE_CUSTOMIZE_COMPRESSION_SUPPORTED
-void PNGAPI
-png_set_compression_level(png_structrp png_ptr, int level)
-{
-   png_debug(1, "in png_set_compression_level");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->zlib_level = level;
-}
-
-void PNGAPI
-png_set_compression_mem_level(png_structrp png_ptr, int mem_level)
-{
-   png_debug(1, "in png_set_compression_mem_level");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->zlib_mem_level = mem_level;
-}
-
-void PNGAPI
-png_set_compression_strategy(png_structrp png_ptr, int strategy)
-{
-   png_debug(1, "in png_set_compression_strategy");
-
-   if (png_ptr == NULL)
-      return;
-
-   /* The flag setting here prevents the libpng dynamic selection of strategy.
-    */
-   png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_STRATEGY;
-   png_ptr->zlib_strategy = strategy;
-}
-
-/* If PNG_WRITE_OPTIMIZE_CMF_SUPPORTED is defined, libpng will use a
- * smaller value of window_bits if it can do so safely.
- */
-void PNGAPI
-png_set_compression_window_bits(png_structrp png_ptr, int window_bits)
-{
-   png_debug(1, "in png_set_compression_window_bits");
-
-   if (png_ptr == NULL)
-      return;
-
-   /* Prior to 1.6.0 this would warn but then set the window_bits value. This
-    * meant that negative window bits values could be selected that would cause
-    * libpng to write a non-standard PNG file with raw deflate or gzip
-    * compressed IDAT or ancillary chunks.  Such files can be read and there is
-    * no warning on read, so this seems like a very bad idea.
-    */
-   if (window_bits > 15)
-   {
-      png_warning(png_ptr, "Only compression windows <= 32k supported by PNG");
-      window_bits = 15;
-   }
-
-   else if (window_bits < 8)
-   {
-      png_warning(png_ptr, "Only compression windows >= 256 supported by PNG");
-      window_bits = 8;
-   }
-
-   png_ptr->zlib_window_bits = window_bits;
-}
-
-void PNGAPI
-png_set_compression_method(png_structrp png_ptr, int method)
-{
-   png_debug(1, "in png_set_compression_method");
-
-   if (png_ptr == NULL)
-      return;
-
-   /* This would produce an invalid PNG file if it worked, but it doesn't and
-    * deflate will fault it, so it is harmless to just warn here.
-    */
-   if (method != 8)
-      png_warning(png_ptr, "Only compression method 8 is supported by PNG");
-
-   png_ptr->zlib_method = method;
-}
-#endif /* WRITE_CUSTOMIZE_COMPRESSION */
-
-/* The following were added to libpng-1.5.4 */
-#ifdef PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED
-void PNGAPI
-png_set_text_compression_level(png_structrp png_ptr, int level)
-{
-   png_debug(1, "in png_set_text_compression_level");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->zlib_text_level = level;
-}
-
-void PNGAPI
-png_set_text_compression_mem_level(png_structrp png_ptr, int mem_level)
-{
-   png_debug(1, "in png_set_text_compression_mem_level");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->zlib_text_mem_level = mem_level;
-}
-
-void PNGAPI
-png_set_text_compression_strategy(png_structrp png_ptr, int strategy)
-{
-   png_debug(1, "in png_set_text_compression_strategy");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->zlib_text_strategy = strategy;
-}
-
-/* If PNG_WRITE_OPTIMIZE_CMF_SUPPORTED is defined, libpng will use a
- * smaller value of window_bits if it can do so safely.
- */
-void PNGAPI
-png_set_text_compression_window_bits(png_structrp png_ptr, int window_bits)
-{
-   png_debug(1, "in png_set_text_compression_window_bits");
-
-   if (png_ptr == NULL)
-      return;
-
-   if (window_bits > 15)
-   {
-      png_warning(png_ptr, "Only compression windows <= 32k supported by PNG");
-      window_bits = 15;
-   }
-
-   else if (window_bits < 8)
-   {
-      png_warning(png_ptr, "Only compression windows >= 256 supported by PNG");
-      window_bits = 8;
-   }
-
-   png_ptr->zlib_text_window_bits = window_bits;
-}
-
-void PNGAPI
-png_set_text_compression_method(png_structrp png_ptr, int method)
-{
-   png_debug(1, "in png_set_text_compression_method");
-
-   if (png_ptr == NULL)
-      return;
-
-   if (method != 8)
-      png_warning(png_ptr, "Only compression method 8 is supported by PNG");
-
-   png_ptr->zlib_text_method = method;
-}
-#endif /* WRITE_CUSTOMIZE_ZTXT_COMPRESSION */
-/* end of API added to libpng-1.5.4 */
-
-void PNGAPI
-png_set_write_status_fn(png_structrp png_ptr, png_write_status_ptr write_row_fn)
-{
-   png_debug(1, "in png_set_write_status_fn");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->write_row_fn = write_row_fn;
-}
-
-#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED
-void PNGAPI
-png_set_write_user_transform_fn(png_structrp png_ptr, png_user_transform_ptr
-    write_user_transform_fn)
-{
-   png_debug(1, "in png_set_write_user_transform_fn");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->transformations |= PNG_USER_TRANSFORM;
-   png_ptr->write_user_transform_fn = write_user_transform_fn;
-}
-#endif
-
-
-#ifdef PNG_INFO_IMAGE_SUPPORTED
-void PNGAPI
-png_write_png(png_structrp png_ptr, png_inforp info_ptr,
-    int transforms, voidp params)
-{
-   png_debug(1, "in png_write_png");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   if ((info_ptr->valid & PNG_INFO_IDAT) == 0)
-   {
-      png_app_error(png_ptr, "no rows for png_write_image to write");
-      return;
-   }
-
-   /* Write the file header information. */
-   png_write_info(png_ptr, info_ptr);
-
-   /* ------ these transformations don't touch the info structure ------- */
-
-   /* Invert monochrome pixels */
-   if ((transforms & PNG_TRANSFORM_INVERT_MONO) != 0)
-#ifdef PNG_WRITE_INVERT_SUPPORTED
-      png_set_invert_mono(png_ptr);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_INVERT_MONO not supported");
-#endif
-
-   /* Shift the pixels up to a legal bit depth and fill in
-    * as appropriate to correctly scale the image.
-    */
-   if ((transforms & PNG_TRANSFORM_SHIFT) != 0)
-#ifdef PNG_WRITE_SHIFT_SUPPORTED
-      if ((info_ptr->valid & PNG_INFO_sBIT) != 0)
-         png_set_shift(png_ptr, &info_ptr->sig_bit);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_SHIFT not supported");
-#endif
-
-   /* Pack pixels into bytes */
-   if ((transforms & PNG_TRANSFORM_PACKING) != 0)
-#ifdef PNG_WRITE_PACK_SUPPORTED
-      png_set_packing(png_ptr);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_PACKING not supported");
-#endif
-
-   /* Swap location of alpha bytes from ARGB to RGBA */
-   if ((transforms & PNG_TRANSFORM_SWAP_ALPHA) != 0)
-#ifdef PNG_WRITE_SWAP_ALPHA_SUPPORTED
-      png_set_swap_alpha(png_ptr);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_SWAP_ALPHA not supported");
-#endif
-
-   /* Remove a filler (X) from XRGB/RGBX/AG/GA into to convert it into
-    * RGB, note that the code expects the input color type to be G or RGB; no
-    * alpha channel.
-    */
-   if ((transforms & (PNG_TRANSFORM_STRIP_FILLER_AFTER|
-       PNG_TRANSFORM_STRIP_FILLER_BEFORE)) != 0)
-   {
-#ifdef PNG_WRITE_FILLER_SUPPORTED
-      if ((transforms & PNG_TRANSFORM_STRIP_FILLER_AFTER) != 0)
-      {
-         if ((transforms & PNG_TRANSFORM_STRIP_FILLER_BEFORE) != 0)
-            png_app_error(png_ptr,
-                "PNG_TRANSFORM_STRIP_FILLER: BEFORE+AFTER not supported");
-
-         /* Continue if ignored - this is the pre-1.6.10 behavior */
-         png_set_filler(png_ptr, 0, PNG_FILLER_AFTER);
-      }
-
-      else if ((transforms & PNG_TRANSFORM_STRIP_FILLER_BEFORE) != 0)
-         png_set_filler(png_ptr, 0, PNG_FILLER_BEFORE);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_STRIP_FILLER not supported");
-#endif
-   }
-
-   /* Flip BGR pixels to RGB */
-   if ((transforms & PNG_TRANSFORM_BGR) != 0)
-#ifdef PNG_WRITE_BGR_SUPPORTED
-      png_set_bgr(png_ptr);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_BGR not supported");
-#endif
-
-   /* Swap bytes of 16-bit files to most significant byte first */
-   if ((transforms & PNG_TRANSFORM_SWAP_ENDIAN) != 0)
-#ifdef PNG_WRITE_SWAP_SUPPORTED
-      png_set_swap(png_ptr);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_SWAP_ENDIAN not supported");
-#endif
-
-   /* Swap bits of 1-bit, 2-bit, 4-bit packed pixel formats */
-   if ((transforms & PNG_TRANSFORM_PACKSWAP) != 0)
-#ifdef PNG_WRITE_PACKSWAP_SUPPORTED
-      png_set_packswap(png_ptr);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_PACKSWAP not supported");
-#endif
-
-   /* Invert the alpha channel from opacity to transparency */
-   if ((transforms & PNG_TRANSFORM_INVERT_ALPHA) != 0)
-#ifdef PNG_WRITE_INVERT_ALPHA_SUPPORTED
-      png_set_invert_alpha(png_ptr);
-#else
-      png_app_error(png_ptr, "PNG_TRANSFORM_INVERT_ALPHA not supported");
-#endif
-
-   /* ----------------------- end of transformations ------------------- */
-
-   /* Write the bits */
-   png_write_image(png_ptr, info_ptr->row_pointers);
-
-   /* It is REQUIRED to call this to finish writing the rest of the file */
-   png_write_end(png_ptr, info_ptr);
-
-   PNG_UNUSED(params)
-}
-#endif
-
-
-#ifdef PNG_SIMPLIFIED_WRITE_SUPPORTED
-/* Initialize the write structure - general purpose utility. */
-static int
-png_image_write_init(png_imagep image)
-{
-   png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, image,
-       png_safe_error, png_safe_warning);
-
-   if (png_ptr != NULL)
-   {
-      png_infop info_ptr = png_create_info_struct(png_ptr);
-
-      if (info_ptr != NULL)
-      {
-         png_controlp control = png_voidcast(png_controlp,
-             png_malloc_warn(png_ptr, (sizeof *control)));
-
-         if (control != NULL)
-         {
-            memset(control, 0, (sizeof *control));
-
-            control->png_ptr = png_ptr;
-            control->info_ptr = info_ptr;
-            control->for_write = 1;
-
-            image->opaque = control;
-            return 1;
-         }
-
-         /* Error clean up */
-         png_destroy_info_struct(png_ptr, &info_ptr);
-      }
-
-      png_destroy_write_struct(&png_ptr, NULL);
-   }
-
-   return png_image_error(image, "png_image_write_: out of memory");
-}
-
-/* Arguments to png_image_write_main: */
-typedef struct
-{
-   /* Arguments: */
-   png_imagep      image;
-   png_const_voidp buffer;
-   png_int_32      row_stride;
-   png_const_voidp colormap;
-   int             convert_to_8bit;
-   /* Local variables: */
-   png_const_voidp first_row;
-   ptrdiff_t       row_bytes;
-   png_voidp       local_row;
-   /* Byte count for memory writing */
-   png_bytep        memory;
-   png_alloc_size_t memory_bytes; /* not used for STDIO */
-   png_alloc_size_t output_bytes; /* running total */
-} png_image_write_control;
-
-/* Write png_uint_16 input to a 16-bit PNG; the png_ptr has already been set to
- * do any necessary byte swapping.  The component order is defined by the
- * png_image format value.
- */
-static int
-png_write_image_16bit(png_voidp argument)
-{
-   png_image_write_control *display = png_voidcast(png_image_write_control*,
-       argument);
-   png_imagep image = display->image;
-   png_structrp png_ptr = image->opaque->png_ptr;
-
-   png_const_uint_16p input_row = png_voidcast(png_const_uint_16p,
-       display->first_row);
-   png_uint_16p output_row = png_voidcast(png_uint_16p, display->local_row);
-   png_uint_16p row_end;
-   unsigned int channels = (image->format & PNG_FORMAT_FLAG_COLOR) != 0 ?
-       3 : 1;
-   int aindex = 0;
-   png_uint_32 y = image->height;
-
-   if ((image->format & PNG_FORMAT_FLAG_ALPHA) != 0)
-   {
-#   ifdef PNG_SIMPLIFIED_WRITE_AFIRST_SUPPORTED
-      if ((image->format & PNG_FORMAT_FLAG_AFIRST) != 0)
-      {
-         aindex = -1;
-         ++input_row; /* To point to the first component */
-         ++output_row;
-      }
-         else
-            aindex = (int)channels;
-#     else
-         aindex = (int)channels;
-#     endif
-   }
-
-   else
-      png_error(png_ptr, "png_write_image: internal call error");
-
-   /* Work out the output row end and count over this, note that the increment
-    * above to 'row' means that row_end can actually be beyond the end of the
-    * row; this is correct.
-    */
-   row_end = output_row + image->width * (channels+1);
-
-   for (; y > 0; --y)
-   {
-      png_const_uint_16p in_ptr = input_row;
-      png_uint_16p out_ptr = output_row;
-
-      while (out_ptr < row_end)
-      {
-         png_uint_16 alpha = in_ptr[aindex];
-         png_uint_32 reciprocal = 0;
-         int c;
-
-         out_ptr[aindex] = alpha;
-
-         /* Calculate a reciprocal.  The correct calculation is simply
-          * component/alpha*65535 << 15. (I.e. 15 bits of precision); this
-          * allows correct rounding by adding .5 before the shift.  'reciprocal'
-          * is only initialized when required.
-          */
-         if (alpha > 0 && alpha < 65535)
-            reciprocal = ((0xffff<<15)+(alpha>>1))/alpha;
-
-         c = (int)channels;
-         do /* always at least one channel */
-         {
-            png_uint_16 component = *in_ptr++;
-
-            /* The following gives 65535 for an alpha of 0, which is fine,
-             * otherwise if 0/0 is represented as some other value there is more
-             * likely to be a discontinuity which will probably damage
-             * compression when moving from a fully transparent area to a
-             * nearly transparent one.  (The assumption here is that opaque
-             * areas tend not to be 0 intensity.)
-             */
-            if (component >= alpha)
-               component = 65535;
-
-            /* component<alpha, so component/alpha is less than one and
-             * component*reciprocal is less than 2^31.
-             */
-            else if (component > 0 && alpha < 65535)
-            {
-               png_uint_32 calc = component * reciprocal;
-               calc += 16384; /* round to nearest */
-               component = (png_uint_16)(calc >> 15);
-            }
-
-            *out_ptr++ = component;
-         }
-         while (--c > 0);
-
-         /* Skip to next component (skip the intervening alpha channel) */
-         ++in_ptr;
-         ++out_ptr;
-      }
-
-      png_write_row(png_ptr, png_voidcast(png_const_bytep, display->local_row));
-      input_row += (png_uint_16)display->row_bytes/(sizeof (png_uint_16));
-   }
-
-   return 1;
-}
-
-/* Given 16-bit input (1 to 4 channels) write 8-bit output.  If an alpha channel
- * is present it must be removed from the components, the components are then
- * written in sRGB encoding.  No components are added or removed.
- *
- * Calculate an alpha reciprocal to reverse pre-multiplication.  As above the
- * calculation can be done to 15 bits of accuracy; however, the output needs to
- * be scaled in the range 0..255*65535, so include that scaling here.
- */
-#   define UNP_RECIPROCAL(alpha) ((((0xffff*0xff)<<7)+((alpha)>>1))/(alpha))
-
-static png_byte
-png_unpremultiply(png_uint_32 component, png_uint_32 alpha,
-    png_uint_32 reciprocal/*from the above macro*/)
-{
-   /* The following gives 1.0 for an alpha of 0, which is fine, otherwise if 0/0
-    * is represented as some other value there is more likely to be a
-    * discontinuity which will probably damage compression when moving from a
-    * fully transparent area to a nearly transparent one.  (The assumption here
-    * is that opaque areas tend not to be 0 intensity.)
-    *
-    * There is a rounding problem here; if alpha is less than 128 it will end up
-    * as 0 when scaled to 8 bits.  To avoid introducing spurious colors into the
-    * output change for this too.
-    */
-   if (component >= alpha || alpha < 128)
-      return 255;
-
-   /* component<alpha, so component/alpha is less than one and
-    * component*reciprocal is less than 2^31.
-    */
-   else if (component > 0)
-   {
-      /* The test is that alpha/257 (rounded) is less than 255, the first value
-       * that becomes 255 is 65407.
-       * NOTE: this must agree with the PNG_DIV257 macro (which must, therefore,
-       * be exact!)  [Could also test reciprocal != 0]
-       */
-      if (alpha < 65407)
-      {
-         component *= reciprocal;
-         component += 64; /* round to nearest */
-         component >>= 7;
-      }
-
-      else
-         component *= 255;
-
-      /* Convert the component to sRGB. */
-      return (png_byte)PNG_sRGB_FROM_LINEAR(component);
-   }
-
-   else
-      return 0;
-}
-
-static int
-png_write_image_8bit(png_voidp argument)
-{
-   png_image_write_control *display = png_voidcast(png_image_write_control*,
-       argument);
-   png_imagep image = display->image;
-   png_structrp png_ptr = image->opaque->png_ptr;
-
-   png_const_uint_16p input_row = png_voidcast(png_const_uint_16p,
-       display->first_row);
-   png_bytep output_row = png_voidcast(png_bytep, display->local_row);
-   png_uint_32 y = image->height;
-   unsigned int channels = (image->format & PNG_FORMAT_FLAG_COLOR) != 0 ?
-       3 : 1;
-
-   if ((image->format & PNG_FORMAT_FLAG_ALPHA) != 0)
-   {
-      png_bytep row_end;
-      int aindex;
-
-#   ifdef PNG_SIMPLIFIED_WRITE_AFIRST_SUPPORTED
-      if ((image->format & PNG_FORMAT_FLAG_AFIRST) != 0)
-      {
-         aindex = -1;
-         ++input_row; /* To point to the first component */
-         ++output_row;
-      }
-
-      else
-#   endif
-      aindex = (int)channels;
-
-      /* Use row_end in place of a loop counter: */
-      row_end = output_row + image->width * (channels+1);
-
-      for (; y > 0; --y)
-      {
-         png_const_uint_16p in_ptr = input_row;
-         png_bytep out_ptr = output_row;
-
-         while (out_ptr < row_end)
-         {
-            png_uint_16 alpha = in_ptr[aindex];
-            png_byte alphabyte = (png_byte)PNG_DIV257(alpha);
-            png_uint_32 reciprocal = 0;
-            int c;
-
-            /* Scale and write the alpha channel. */
-            out_ptr[aindex] = alphabyte;
-
-            if (alphabyte > 0 && alphabyte < 255)
-               reciprocal = UNP_RECIPROCAL(alpha);
-
-            c = (int)channels;
-            do /* always at least one channel */
-               *out_ptr++ = png_unpremultiply(*in_ptr++, alpha, reciprocal);
-            while (--c > 0);
-
-            /* Skip to next component (skip the intervening alpha channel) */
-            ++in_ptr;
-            ++out_ptr;
-         } /* while out_ptr < row_end */
-
-         png_write_row(png_ptr, png_voidcast(png_const_bytep,
-             display->local_row));
-         input_row += (png_uint_16)display->row_bytes/(sizeof (png_uint_16));
-      } /* while y */
-   }
-
-   else
-   {
-      /* No alpha channel, so the row_end really is the end of the row and it
-       * is sufficient to loop over the components one by one.
-       */
-      png_bytep row_end = output_row + image->width * channels;
-
-      for (; y > 0; --y)
-      {
-         png_const_uint_16p in_ptr = input_row;
-         png_bytep out_ptr = output_row;
-
-         while (out_ptr < row_end)
-         {
-            png_uint_32 component = *in_ptr++;
-
-            component *= 255;
-            *out_ptr++ = (png_byte)PNG_sRGB_FROM_LINEAR(component);
-         }
-
-         png_write_row(png_ptr, output_row);
-         input_row += (png_uint_16)display->row_bytes/(sizeof (png_uint_16));
-      }
-   }
-
-   return 1;
-}
-
-static void
-png_image_set_PLTE(png_image_write_control *display)
-{
-   png_imagep image = display->image;
-   const void *cmap = display->colormap;
-   int entries = image->colormap_entries > 256 ? 256 :
-       (int)image->colormap_entries;
-
-   /* NOTE: the caller must check for cmap != NULL and entries != 0 */
-   png_uint_32 format = image->format;
-   unsigned int channels = PNG_IMAGE_SAMPLE_CHANNELS(format);
-
-#   if defined(PNG_FORMAT_BGR_SUPPORTED) &&\
-      defined(PNG_SIMPLIFIED_WRITE_AFIRST_SUPPORTED)
-      int afirst = (format & PNG_FORMAT_FLAG_AFIRST) != 0 &&
-          (format & PNG_FORMAT_FLAG_ALPHA) != 0;
-#   else
-#     define afirst 0
-#   endif
-
-#   ifdef PNG_FORMAT_BGR_SUPPORTED
-      int bgr = (format & PNG_FORMAT_FLAG_BGR) != 0 ? 2 : 0;
-#   else
-#     define bgr 0
-#   endif
-
-   int i, num_trans;
-   png_color palette[256];
-   png_byte tRNS[256];
-
-   memset(tRNS, 255, (sizeof tRNS));
-   memset(palette, 0, (sizeof palette));
-
-   for (i=num_trans=0; i<entries; ++i)
-   {
-      /* This gets automatically converted to sRGB with reversal of the
-       * pre-multiplication if the color-map has an alpha channel.
-       */
-      if ((format & PNG_FORMAT_FLAG_LINEAR) != 0)
-      {
-         png_const_uint_16p entry = png_voidcast(png_const_uint_16p, cmap);
-
-         entry += (unsigned int)i * channels;
-
-         if ((channels & 1) != 0) /* no alpha */
-         {
-            if (channels >= 3) /* RGB */
-            {
-               palette[i].blue = (png_byte)PNG_sRGB_FROM_LINEAR(255 *
-                   entry[(2 ^ bgr)]);
-               palette[i].green = (png_byte)PNG_sRGB_FROM_LINEAR(255 *
-                   entry[1]);
-               palette[i].red = (png_byte)PNG_sRGB_FROM_LINEAR(255 *
-                   entry[bgr]);
-            }
-
-            else /* Gray */
-               palette[i].blue = palette[i].red = palette[i].green =
-                  (png_byte)PNG_sRGB_FROM_LINEAR(255 * *entry);
-         }
-
-         else /* alpha */
-         {
-            png_uint_16 alpha = entry[afirst ? 0 : channels-1];
-            png_byte alphabyte = (png_byte)PNG_DIV257(alpha);
-            png_uint_32 reciprocal = 0;
-
-            /* Calculate a reciprocal, as in the png_write_image_8bit code above
-             * this is designed to produce a value scaled to 255*65535 when
-             * divided by 128 (i.e. asr 7).
-             */
-            if (alphabyte > 0 && alphabyte < 255)
-               reciprocal = (((0xffff*0xff)<<7)+(alpha>>1))/alpha;
-
-            tRNS[i] = alphabyte;
-            if (alphabyte < 255)
-               num_trans = i+1;
-
-            if (channels >= 3) /* RGB */
-            {
-               palette[i].blue = png_unpremultiply(entry[afirst + (2 ^ bgr)],
-                   alpha, reciprocal);
-               palette[i].green = png_unpremultiply(entry[afirst + 1], alpha,
-                   reciprocal);
-               palette[i].red = png_unpremultiply(entry[afirst + bgr], alpha,
-                   reciprocal);
-            }
-
-            else /* gray */
-               palette[i].blue = palette[i].red = palette[i].green =
-                   png_unpremultiply(entry[afirst], alpha, reciprocal);
-         }
-      }
-
-      else /* Color-map has sRGB values */
-      {
-         png_const_bytep entry = png_voidcast(png_const_bytep, cmap);
-
-         entry += (unsigned int)i * channels;
-
-         switch (channels)
-         {
-            case 4:
-               tRNS[i] = entry[afirst ? 0 : 3];
-               if (tRNS[i] < 255)
-                  num_trans = i+1;
-               /* FALLTHROUGH */
-            case 3:
-               palette[i].blue = entry[afirst + (2 ^ bgr)];
-               palette[i].green = entry[afirst + 1];
-               palette[i].red = entry[afirst + bgr];
-               break;
-
-            case 2:
-               tRNS[i] = entry[1 ^ afirst];
-               if (tRNS[i] < 255)
-                  num_trans = i+1;
-               /* FALLTHROUGH */
-            case 1:
-               palette[i].blue = palette[i].red = palette[i].green =
-                  entry[afirst];
-               break;
-
-            default:
-               break;
-         }
-      }
-   }
-
-#   ifdef afirst
-#     undef afirst
-#   endif
-#   ifdef bgr
-#     undef bgr
-#   endif
-
-   png_set_PLTE(image->opaque->png_ptr, image->opaque->info_ptr, palette,
-       entries);
-
-   if (num_trans > 0)
-      png_set_tRNS(image->opaque->png_ptr, image->opaque->info_ptr, tRNS,
-          num_trans, NULL);
-
-   image->colormap_entries = (png_uint_32)entries;
-}
-
-static int
-png_image_write_main(png_voidp argument)
-{
-   png_image_write_control *display = png_voidcast(png_image_write_control*,
-       argument);
-   png_imagep image = display->image;
-   png_structrp png_ptr = image->opaque->png_ptr;
-   png_inforp info_ptr = image->opaque->info_ptr;
-   png_uint_32 format = image->format;
-
-   /* The following four ints are actually booleans */
-   int colormap = (format & PNG_FORMAT_FLAG_COLORMAP);
-   int linear = !colormap && (format & PNG_FORMAT_FLAG_LINEAR); /* input */
-   int alpha = !colormap && (format & PNG_FORMAT_FLAG_ALPHA);
-   int write_16bit = linear && (display->convert_to_8bit == 0);
-
-#   ifdef PNG_BENIGN_ERRORS_SUPPORTED
-      /* Make sure we error out on any bad situation */
-      png_set_benign_errors(png_ptr, 0/*error*/);
-#   endif
-
-   /* Default the 'row_stride' parameter if required, also check the row stride
-    * and total image size to ensure that they are within the system limits.
-    */
-   {
-      unsigned int channels = PNG_IMAGE_PIXEL_CHANNELS(image->format);
-
-      if (image->width <= 0x7fffffffU/channels) /* no overflow */
-      {
-         png_uint_32 check;
-         png_uint_32 png_row_stride = image->width * channels;
-
-         if (display->row_stride == 0)
-            display->row_stride = (png_int_32)/*SAFE*/png_row_stride;
-
-         if (display->row_stride < 0)
-            check = (png_uint_32)(-display->row_stride);
-
-         else
-            check = (png_uint_32)display->row_stride;
-
-         if (check >= png_row_stride)
-         {
-            /* Now check for overflow of the image buffer calculation; this
-             * limits the whole image size to 32 bits for API compatibility with
-             * the current, 32-bit, PNG_IMAGE_BUFFER_SIZE macro.
-             */
-            if (image->height > 0xffffffffU/png_row_stride)
-               png_error(image->opaque->png_ptr, "memory image too large");
-         }
-
-         else
-            png_error(image->opaque->png_ptr, "supplied row stride too small");
-      }
-
-      else
-         png_error(image->opaque->png_ptr, "image row stride too large");
-   }
-
-   /* Set the required transforms then write the rows in the correct order. */
-   if ((format & PNG_FORMAT_FLAG_COLORMAP) != 0)
-   {
-      if (display->colormap != NULL && image->colormap_entries > 0)
-      {
-         png_uint_32 entries = image->colormap_entries;
-
-         png_set_IHDR(png_ptr, info_ptr, image->width, image->height,
-             entries > 16 ? 8 : (entries > 4 ? 4 : (entries > 2 ? 2 : 1)),
-             PNG_COLOR_TYPE_PALETTE, PNG_INTERLACE_NONE,
-             PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);
-
-         png_image_set_PLTE(display);
-      }
-
-      else
-         png_error(image->opaque->png_ptr,
-             "no color-map for color-mapped image");
-   }
-
-   else
-      png_set_IHDR(png_ptr, info_ptr, image->width, image->height,
-          write_16bit ? 16 : 8,
-          ((format & PNG_FORMAT_FLAG_COLOR) ? PNG_COLOR_MASK_COLOR : 0) +
-          ((format & PNG_FORMAT_FLAG_ALPHA) ? PNG_COLOR_MASK_ALPHA : 0),
-          PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);
-
-   /* Counter-intuitively the data transformations must be called *after*
-    * png_write_info, not before as in the read code, but the 'set' functions
-    * must still be called before.  Just set the color space information, never
-    * write an interlaced image.
-    */
-
-   if (write_16bit != 0)
-   {
-      /* The gamma here is 1.0 (linear) and the cHRM chunk matches sRGB. */
-      png_set_gAMA_fixed(png_ptr, info_ptr, PNG_GAMMA_LINEAR);
-
-      if ((image->flags & PNG_IMAGE_FLAG_COLORSPACE_NOT_sRGB) == 0)
-         png_set_cHRM_fixed(png_ptr, info_ptr,
-             /* color      x       y */
-             /* white */ 31270, 32900,
-             /* red   */ 64000, 33000,
-             /* green */ 30000, 60000,
-             /* blue  */ 15000,  6000
-         );
-   }
-
-   else if ((image->flags & PNG_IMAGE_FLAG_COLORSPACE_NOT_sRGB) == 0)
-      png_set_sRGB(png_ptr, info_ptr, PNG_sRGB_INTENT_PERCEPTUAL);
-
-   /* Else writing an 8-bit file and the *colors* aren't sRGB, but the 8-bit
-    * space must still be gamma encoded.
-    */
-   else
-      png_set_gAMA_fixed(png_ptr, info_ptr, PNG_GAMMA_sRGB_INVERSE);
-
-   /* Write the file header. */
-   png_write_info(png_ptr, info_ptr);
-
-   /* Now set up the data transformations (*after* the header is written),
-    * remove the handled transformations from the 'format' flags for checking.
-    *
-    * First check for a little endian system if writing 16-bit files.
-    */
-   if (write_16bit != 0)
-   {
-      png_uint_16 le = 0x0001;
-
-      if ((*(png_const_bytep) & le) != 0)
-         png_set_swap(png_ptr);
-   }
-
-#   ifdef PNG_SIMPLIFIED_WRITE_BGR_SUPPORTED
-      if ((format & PNG_FORMAT_FLAG_BGR) != 0)
-      {
-         if (colormap == 0 && (format & PNG_FORMAT_FLAG_COLOR) != 0)
-            png_set_bgr(png_ptr);
-         format &= ~PNG_FORMAT_FLAG_BGR;
-      }
-#   endif
-
-#   ifdef PNG_SIMPLIFIED_WRITE_AFIRST_SUPPORTED
-      if ((format & PNG_FORMAT_FLAG_AFIRST) != 0)
-      {
-         if (colormap == 0 && (format & PNG_FORMAT_FLAG_ALPHA) != 0)
-            png_set_swap_alpha(png_ptr);
-         format &= ~PNG_FORMAT_FLAG_AFIRST;
-      }
-#   endif
-
-   /* If there are 16 or fewer color-map entries we wrote a lower bit depth
-    * above, but the application data is still byte packed.
-    */
-   if (colormap != 0 && image->colormap_entries <= 16)
-      png_set_packing(png_ptr);
-
-   /* That should have handled all (both) the transforms. */
-   if ((format & ~(png_uint_32)(PNG_FORMAT_FLAG_COLOR | PNG_FORMAT_FLAG_LINEAR |
-         PNG_FORMAT_FLAG_ALPHA | PNG_FORMAT_FLAG_COLORMAP)) != 0)
-      png_error(png_ptr, "png_write_image: unsupported transformation");
-
-   {
-      png_const_bytep row = png_voidcast(png_const_bytep, display->buffer);
-      ptrdiff_t row_bytes = display->row_stride;
-
-      if (linear != 0)
-         row_bytes *= (sizeof (png_uint_16));
-
-      if (row_bytes < 0)
-         row += (image->height-1) * (-row_bytes);
-
-      display->first_row = row;
-      display->row_bytes = row_bytes;
-   }
-
-   /* Apply 'fast' options if the flag is set. */
-   if ((image->flags & PNG_IMAGE_FLAG_FAST) != 0)
-   {
-      png_set_filter(png_ptr, PNG_FILTER_TYPE_BASE, PNG_NO_FILTERS);
-      /* NOTE: determined by experiment using pngstest, this reflects some
-       * balance between the time to write the image once and the time to read
-       * it about 50 times.  The speed-up in pngstest was about 10-20% of the
-       * total (user) time on a heavily loaded system.
-       */
-#   ifdef PNG_WRITE_CUSTOMIZE_COMPRESSION_SUPPORTED
-      png_set_compression_level(png_ptr, 3);
-#   endif
-   }
-
-   /* Check for the cases that currently require a pre-transform on the row
-    * before it is written.  This only applies when the input is 16-bit and
-    * either there is an alpha channel or it is converted to 8-bit.
-    */
-   if ((linear != 0 && alpha != 0 ) ||
-       (colormap == 0 && display->convert_to_8bit != 0))
-   {
-      png_bytep row = png_voidcast(png_bytep, png_malloc(png_ptr,
-          png_get_rowbytes(png_ptr, info_ptr)));
-      int result;
-
-      display->local_row = row;
-      if (write_16bit != 0)
-         result = png_safe_execute(image, png_write_image_16bit, display);
-      else
-         result = png_safe_execute(image, png_write_image_8bit, display);
-      display->local_row = NULL;
-
-      png_free(png_ptr, row);
-
-      /* Skip the 'write_end' on error: */
-      if (result == 0)
-         return 0;
-   }
-
-   /* Otherwise this is the case where the input is in a format currently
-    * supported by the rest of the libpng write code; call it directly.
-    */
-   else
-   {
-      png_const_bytep row = png_voidcast(png_const_bytep, display->first_row);
-      ptrdiff_t row_bytes = display->row_bytes;
-      png_uint_32 y = image->height;
-
-      for (; y > 0; --y)
-      {
-         png_write_row(png_ptr, row);
-         row += row_bytes;
-      }
-   }
-
-   png_write_end(png_ptr, info_ptr);
-   return 1;
-}
-
-
-static void (PNGCBAPI
-image_memory_write)(png_structp png_ptr, png_bytep/*const*/ data, size_t size)
-{
-   png_image_write_control *display = png_voidcast(png_image_write_control*,
-       png_ptr->io_ptr/*backdoor: png_get_io_ptr(png_ptr)*/);
-   png_alloc_size_t ob = display->output_bytes;
-
-   /* Check for overflow; this should never happen: */
-   if (size <= ((png_alloc_size_t)-1) - ob)
-   {
-      /* I don't think libpng ever does this, but just in case: */
-      if (size > 0)
-      {
-         if (display->memory_bytes >= ob+size) /* writing */
-            memcpy(display->memory+ob, data, size);
-
-         /* Always update the size: */
-         display->output_bytes = ob+size;
-      }
-   }
-
-   else
-      png_error(png_ptr, "png_image_write_to_memory: PNG too big");
-}
-
-static void (PNGCBAPI
-image_memory_flush)(png_structp png_ptr)
-{
-   PNG_UNUSED(png_ptr)
-}
-
-static int
-png_image_write_memory(png_voidp argument)
-{
-   png_image_write_control *display = png_voidcast(png_image_write_control*,
-       argument);
-
-   /* The rest of the memory-specific init and write_main in an error protected
-    * environment.  This case needs to use callbacks for the write operations
-    * since libpng has no built in support for writing to memory.
-    */
-   png_set_write_fn(display->image->opaque->png_ptr, display/*io_ptr*/,
-       image_memory_write, image_memory_flush);
-
-   return png_image_write_main(display);
-}
-
-int PNGAPI
-png_image_write_to_memory(png_imagep image, void *memory,
-    png_alloc_size_t * PNG_RESTRICT memory_bytes, int convert_to_8bit,
-    const void *buffer, png_int_32 row_stride, const void *colormap)
-{
-   /* Write the image to the given buffer, or count the bytes if it is NULL */
-   if (image != NULL && image->version == PNG_IMAGE_VERSION)
-   {
-      if (memory_bytes != NULL && buffer != NULL)
-      {
-         /* This is to give the caller an easier error detection in the NULL
-          * case and guard against uninitialized variable problems:
-          */
-         if (memory == NULL)
-            *memory_bytes = 0;
-
-         if (png_image_write_init(image) != 0)
-         {
-            png_image_write_control display;
-            int result;
-
-            memset(&display, 0, (sizeof display));
-            display.image = image;
-            display.buffer = buffer;
-            display.row_stride = row_stride;
-            display.colormap = colormap;
-            display.convert_to_8bit = convert_to_8bit;
-            display.memory = png_voidcast(png_bytep, memory);
-            display.memory_bytes = *memory_bytes;
-            display.output_bytes = 0;
-
-            result = png_safe_execute(image, png_image_write_memory, &display);
-            png_image_free(image);
-
-            /* write_memory returns true even if we ran out of buffer. */
-            if (result)
-            {
-               /* On out-of-buffer this function returns '0' but still updates
-                * memory_bytes:
-                */
-               if (memory != NULL && display.output_bytes > *memory_bytes)
-                  result = 0;
-
-               *memory_bytes = display.output_bytes;
-            }
-
-            return result;
-         }
-
-         else
-            return 0;
-      }
-
-      else
-         return png_image_error(image,
-             "png_image_write_to_memory: invalid argument");
-   }
-
-   else if (image != NULL)
-      return png_image_error(image,
-          "png_image_write_to_memory: incorrect PNG_IMAGE_VERSION");
-
-   else
-      return 0;
-}
-
-#ifdef PNG_SIMPLIFIED_WRITE_STDIO_SUPPORTED
-int PNGAPI
-png_image_write_to_stdio(png_imagep image, FILE *file, int convert_to_8bit,
-    const void *buffer, png_int_32 row_stride, const void *colormap)
-{
-   /* Write the image to the given (FILE*). */
-   if (image != NULL && image->version == PNG_IMAGE_VERSION)
-   {
-      if (file != NULL && buffer != NULL)
-      {
-         if (png_image_write_init(image) != 0)
-         {
-            png_image_write_control display;
-            int result;
-
-            /* This is slightly evil, but png_init_io doesn't do anything other
-             * than this and we haven't changed the standard IO functions so
-             * this saves a 'safe' function.
-             */
-            image->opaque->png_ptr->io_ptr = file;
-
-            memset(&display, 0, (sizeof display));
-            display.image = image;
-            display.buffer = buffer;
-            display.row_stride = row_stride;
-            display.colormap = colormap;
-            display.convert_to_8bit = convert_to_8bit;
-
-            result = png_safe_execute(image, png_image_write_main, &display);
-            png_image_free(image);
-            return result;
-         }
-
-         else
-            return 0;
-      }
-
-      else
-         return png_image_error(image,
-             "png_image_write_to_stdio: invalid argument");
-   }
-
-   else if (image != NULL)
-      return png_image_error(image,
-          "png_image_write_to_stdio: incorrect PNG_IMAGE_VERSION");
-
-   else
-      return 0;
-}
-
-int PNGAPI
-png_image_write_to_file(png_imagep image, const char *file_name,
-    int convert_to_8bit, const void *buffer, png_int_32 row_stride,
-    const void *colormap)
-{
-   /* Write the image to the named file. */
-   if (image != NULL && image->version == PNG_IMAGE_VERSION)
-   {
-      if (file_name != NULL && buffer != NULL)
-      {
-         FILE *fp = fopen(file_name, "wb");
-
-         if (fp != NULL)
-         {
-            if (png_image_write_to_stdio(image, fp, convert_to_8bit, buffer,
-                row_stride, colormap) != 0)
-            {
-               int error; /* from fflush/fclose */
-
-               /* Make sure the file is flushed correctly. */
-               if (fflush(fp) == 0 && ferror(fp) == 0)
-               {
-                  if (fclose(fp) == 0)
-                     return 1;
-
-                  error = errno; /* from fclose */
-               }
-
-               else
-               {
-                  error = errno; /* from fflush or ferror */
-                  (void)fclose(fp);
-               }
-
-               (void)remove(file_name);
-               /* The image has already been cleaned up; this is just used to
-                * set the error (because the original write succeeded).
-                */
-               return png_image_error(image, strerror(error));
-            }
-
-            else
-            {
-               /* Clean up: just the opened file. */
-               (void)fclose(fp);
-               (void)remove(file_name);
-               return 0;
-            }
-         }
-
-         else
-            return png_image_error(image, strerror(errno));
-      }
-
-      else
-         return png_image_error(image,
-             "png_image_write_to_file: invalid argument");
-   }
-
-   else if (image != NULL)
-      return png_image_error(image,
-          "png_image_write_to_file: incorrect PNG_IMAGE_VERSION");
-
-   else
-      return 0;
-}
-#endif /* SIMPLIFIED_WRITE_STDIO */
-#endif /* SIMPLIFIED_WRITE */
-#endif /* WRITE */
diff --git a/dep/libpng/src/pngwtran.c b/dep/libpng/src/pngwtran.c
deleted file mode 100644
index 49a13c1e9..000000000
--- a/dep/libpng/src/pngwtran.c
+++ /dev/null
@@ -1,575 +0,0 @@
-
-/* pngwtran.c - transforms the data in a row for PNG writers
- *
- * Copyright (c) 2018 Cosmin Truta
- * Copyright (c) 1998-2002,2004,2006-2016,2018 Glenn Randers-Pehrson
- * Copyright (c) 1996-1997 Andreas Dilger
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-#include "pngpriv.h"
-
-#ifdef PNG_WRITE_SUPPORTED
-#ifdef PNG_WRITE_TRANSFORMS_SUPPORTED
-
-#ifdef PNG_WRITE_PACK_SUPPORTED
-/* Pack pixels into bytes.  Pass the true bit depth in bit_depth.  The
- * row_info bit depth should be 8 (one pixel per byte).  The channels
- * should be 1 (this only happens on grayscale and paletted images).
- */
-static void
-png_do_pack(png_row_infop row_info, png_bytep row, png_uint_32 bit_depth)
-{
-   png_debug(1, "in png_do_pack");
-
-   if (row_info->bit_depth == 8 &&
-      row_info->channels == 1)
-   {
-      switch ((int)bit_depth)
-      {
-         case 1:
-         {
-            png_bytep sp, dp;
-            int mask, v;
-            png_uint_32 i;
-            png_uint_32 row_width = row_info->width;
-
-            sp = row;
-            dp = row;
-            mask = 0x80;
-            v = 0;
-
-            for (i = 0; i < row_width; i++)
-            {
-               if (*sp != 0)
-                  v |= mask;
-
-               sp++;
-
-               if (mask > 1)
-                  mask >>= 1;
-
-               else
-               {
-                  mask = 0x80;
-                  *dp = (png_byte)v;
-                  dp++;
-                  v = 0;
-               }
-            }
-
-            if (mask != 0x80)
-               *dp = (png_byte)v;
-
-            break;
-         }
-
-         case 2:
-         {
-            png_bytep sp, dp;
-            unsigned int shift;
-            int v;
-            png_uint_32 i;
-            png_uint_32 row_width = row_info->width;
-
-            sp = row;
-            dp = row;
-            shift = 6;
-            v = 0;
-
-            for (i = 0; i < row_width; i++)
-            {
-               png_byte value;
-
-               value = (png_byte)(*sp & 0x03);
-               v |= (value << shift);
-
-               if (shift == 0)
-               {
-                  shift = 6;
-                  *dp = (png_byte)v;
-                  dp++;
-                  v = 0;
-               }
-
-               else
-                  shift -= 2;
-
-               sp++;
-            }
-
-            if (shift != 6)
-               *dp = (png_byte)v;
-
-            break;
-         }
-
-         case 4:
-         {
-            png_bytep sp, dp;
-            unsigned int shift;
-            int v;
-            png_uint_32 i;
-            png_uint_32 row_width = row_info->width;
-
-            sp = row;
-            dp = row;
-            shift = 4;
-            v = 0;
-
-            for (i = 0; i < row_width; i++)
-            {
-               png_byte value;
-
-               value = (png_byte)(*sp & 0x0f);
-               v |= (value << shift);
-
-               if (shift == 0)
-               {
-                  shift = 4;
-                  *dp = (png_byte)v;
-                  dp++;
-                  v = 0;
-               }
-
-               else
-                  shift -= 4;
-
-               sp++;
-            }
-
-            if (shift != 4)
-               *dp = (png_byte)v;
-
-            break;
-         }
-
-         default:
-            break;
-      }
-
-      row_info->bit_depth = (png_byte)bit_depth;
-      row_info->pixel_depth = (png_byte)(bit_depth * row_info->channels);
-      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,
-          row_info->width);
-   }
-}
-#endif
-
-#ifdef PNG_WRITE_SHIFT_SUPPORTED
-/* Shift pixel values to take advantage of whole range.  Pass the
- * true number of bits in bit_depth.  The row should be packed
- * according to row_info->bit_depth.  Thus, if you had a row of
- * bit depth 4, but the pixels only had values from 0 to 7, you
- * would pass 3 as bit_depth, and this routine would translate the
- * data to 0 to 15.
- */
-static void
-png_do_shift(png_row_infop row_info, png_bytep row,
-    png_const_color_8p bit_depth)
-{
-   png_debug(1, "in png_do_shift");
-
-   if (row_info->color_type != PNG_COLOR_TYPE_PALETTE)
-   {
-      int shift_start[4], shift_dec[4];
-      unsigned int channels = 0;
-
-      if ((row_info->color_type & PNG_COLOR_MASK_COLOR) != 0)
-      {
-         shift_start[channels] = row_info->bit_depth - bit_depth->red;
-         shift_dec[channels] = bit_depth->red;
-         channels++;
-
-         shift_start[channels] = row_info->bit_depth - bit_depth->green;
-         shift_dec[channels] = bit_depth->green;
-         channels++;
-
-         shift_start[channels] = row_info->bit_depth - bit_depth->blue;
-         shift_dec[channels] = bit_depth->blue;
-         channels++;
-      }
-
-      else
-      {
-         shift_start[channels] = row_info->bit_depth - bit_depth->gray;
-         shift_dec[channels] = bit_depth->gray;
-         channels++;
-      }
-
-      if ((row_info->color_type & PNG_COLOR_MASK_ALPHA) != 0)
-      {
-         shift_start[channels] = row_info->bit_depth - bit_depth->alpha;
-         shift_dec[channels] = bit_depth->alpha;
-         channels++;
-      }
-
-      /* With low row depths, could only be grayscale, so one channel */
-      if (row_info->bit_depth < 8)
-      {
-         png_bytep bp = row;
-         size_t i;
-         unsigned int mask;
-         size_t row_bytes = row_info->rowbytes;
-
-         if (bit_depth->gray == 1 && row_info->bit_depth == 2)
-            mask = 0x55;
-
-         else if (row_info->bit_depth == 4 && bit_depth->gray == 3)
-            mask = 0x11;
-
-         else
-            mask = 0xff;
-
-         for (i = 0; i < row_bytes; i++, bp++)
-         {
-            int j;
-            unsigned int v, out;
-
-            v = *bp;
-            out = 0;
-
-            for (j = shift_start[0]; j > -shift_dec[0]; j -= shift_dec[0])
-            {
-               if (j > 0)
-                  out |= v << j;
-
-               else
-                  out |= (v >> (-j)) & mask;
-            }
-
-            *bp = (png_byte)(out & 0xff);
-         }
-      }
-
-      else if (row_info->bit_depth == 8)
-      {
-         png_bytep bp = row;
-         png_uint_32 i;
-         png_uint_32 istop = channels * row_info->width;
-
-         for (i = 0; i < istop; i++, bp++)
-         {
-            unsigned int c = i%channels;
-            int j;
-            unsigned int v, out;
-
-            v = *bp;
-            out = 0;
-
-            for (j = shift_start[c]; j > -shift_dec[c]; j -= shift_dec[c])
-            {
-               if (j > 0)
-                  out |= v << j;
-
-               else
-                  out |= v >> (-j);
-            }
-
-            *bp = (png_byte)(out & 0xff);
-         }
-      }
-
-      else
-      {
-         png_bytep bp;
-         png_uint_32 i;
-         png_uint_32 istop = channels * row_info->width;
-
-         for (bp = row, i = 0; i < istop; i++)
-         {
-            unsigned int c = i%channels;
-            int j;
-            unsigned int value, v;
-
-            v = png_get_uint_16(bp);
-            value = 0;
-
-            for (j = shift_start[c]; j > -shift_dec[c]; j -= shift_dec[c])
-            {
-               if (j > 0)
-                  value |= v << j;
-
-               else
-                  value |= v >> (-j);
-            }
-            *bp++ = (png_byte)((value >> 8) & 0xff);
-            *bp++ = (png_byte)(value & 0xff);
-         }
-      }
-   }
-}
-#endif
-
-#ifdef PNG_WRITE_SWAP_ALPHA_SUPPORTED
-static void
-png_do_write_swap_alpha(png_row_infop row_info, png_bytep row)
-{
-   png_debug(1, "in png_do_write_swap_alpha");
-
-   {
-      if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
-      {
-         if (row_info->bit_depth == 8)
-         {
-            /* This converts from ARGB to RGBA */
-            png_bytep sp, dp;
-            png_uint_32 i;
-            png_uint_32 row_width = row_info->width;
-
-            for (i = 0, sp = dp = row; i < row_width; i++)
-            {
-               png_byte save = *(sp++);
-               *(dp++) = *(sp++);
-               *(dp++) = *(sp++);
-               *(dp++) = *(sp++);
-               *(dp++) = save;
-            }
-         }
-
-#ifdef PNG_WRITE_16BIT_SUPPORTED
-         else
-         {
-            /* This converts from AARRGGBB to RRGGBBAA */
-            png_bytep sp, dp;
-            png_uint_32 i;
-            png_uint_32 row_width = row_info->width;
-
-            for (i = 0, sp = dp = row; i < row_width; i++)
-            {
-               png_byte save[2];
-               save[0] = *(sp++);
-               save[1] = *(sp++);
-               *(dp++) = *(sp++);
-               *(dp++) = *(sp++);
-               *(dp++) = *(sp++);
-               *(dp++) = *(sp++);
-               *(dp++) = *(sp++);
-               *(dp++) = *(sp++);
-               *(dp++) = save[0];
-               *(dp++) = save[1];
-            }
-         }
-#endif /* WRITE_16BIT */
-      }
-
-      else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
-      {
-         if (row_info->bit_depth == 8)
-         {
-            /* This converts from AG to GA */
-            png_bytep sp, dp;
-            png_uint_32 i;
-            png_uint_32 row_width = row_info->width;
-
-            for (i = 0, sp = dp = row; i < row_width; i++)
-            {
-               png_byte save = *(sp++);
-               *(dp++) = *(sp++);
-               *(dp++) = save;
-            }
-         }
-
-#ifdef PNG_WRITE_16BIT_SUPPORTED
-         else
-         {
-            /* This converts from AAGG to GGAA */
-            png_bytep sp, dp;
-            png_uint_32 i;
-            png_uint_32 row_width = row_info->width;
-
-            for (i = 0, sp = dp = row; i < row_width; i++)
-            {
-               png_byte save[2];
-               save[0] = *(sp++);
-               save[1] = *(sp++);
-               *(dp++) = *(sp++);
-               *(dp++) = *(sp++);
-               *(dp++) = save[0];
-               *(dp++) = save[1];
-            }
-         }
-#endif /* WRITE_16BIT */
-      }
-   }
-}
-#endif
-
-#ifdef PNG_WRITE_INVERT_ALPHA_SUPPORTED
-static void
-png_do_write_invert_alpha(png_row_infop row_info, png_bytep row)
-{
-   png_debug(1, "in png_do_write_invert_alpha");
-
-   {
-      if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
-      {
-         if (row_info->bit_depth == 8)
-         {
-            /* This inverts the alpha channel in RGBA */
-            png_bytep sp, dp;
-            png_uint_32 i;
-            png_uint_32 row_width = row_info->width;
-
-            for (i = 0, sp = dp = row; i < row_width; i++)
-            {
-               /* Does nothing
-               *(dp++) = *(sp++);
-               *(dp++) = *(sp++);
-               *(dp++) = *(sp++);
-               */
-               sp+=3; dp = sp;
-               *dp = (png_byte)(255 - *(sp++));
-            }
-         }
-
-#ifdef PNG_WRITE_16BIT_SUPPORTED
-         else
-         {
-            /* This inverts the alpha channel in RRGGBBAA */
-            png_bytep sp, dp;
-            png_uint_32 i;
-            png_uint_32 row_width = row_info->width;
-
-            for (i = 0, sp = dp = row; i < row_width; i++)
-            {
-               /* Does nothing
-               *(dp++) = *(sp++);
-               *(dp++) = *(sp++);
-               *(dp++) = *(sp++);
-               *(dp++) = *(sp++);
-               *(dp++) = *(sp++);
-               *(dp++) = *(sp++);
-               */
-               sp+=6; dp = sp;
-               *(dp++) = (png_byte)(255 - *(sp++));
-               *dp     = (png_byte)(255 - *(sp++));
-            }
-         }
-#endif /* WRITE_16BIT */
-      }
-
-      else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
-      {
-         if (row_info->bit_depth == 8)
-         {
-            /* This inverts the alpha channel in GA */
-            png_bytep sp, dp;
-            png_uint_32 i;
-            png_uint_32 row_width = row_info->width;
-
-            for (i = 0, sp = dp = row; i < row_width; i++)
-            {
-               *(dp++) = *(sp++);
-               *(dp++) = (png_byte)(255 - *(sp++));
-            }
-         }
-
-#ifdef PNG_WRITE_16BIT_SUPPORTED
-         else
-         {
-            /* This inverts the alpha channel in GGAA */
-            png_bytep sp, dp;
-            png_uint_32 i;
-            png_uint_32 row_width = row_info->width;
-
-            for (i = 0, sp = dp = row; i < row_width; i++)
-            {
-               /* Does nothing
-               *(dp++) = *(sp++);
-               *(dp++) = *(sp++);
-               */
-               sp+=2; dp = sp;
-               *(dp++) = (png_byte)(255 - *(sp++));
-               *dp     = (png_byte)(255 - *(sp++));
-            }
-         }
-#endif /* WRITE_16BIT */
-      }
-   }
-}
-#endif
-
-/* Transform the data according to the user's wishes.  The order of
- * transformations is significant.
- */
-void /* PRIVATE */
-png_do_write_transformations(png_structrp png_ptr, png_row_infop row_info)
-{
-   png_debug(1, "in png_do_write_transformations");
-
-   if (png_ptr == NULL)
-      return;
-
-#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED
-   if ((png_ptr->transformations & PNG_USER_TRANSFORM) != 0)
-      if (png_ptr->write_user_transform_fn != NULL)
-         (*(png_ptr->write_user_transform_fn)) /* User write transform
-                                                 function */
-             (png_ptr,  /* png_ptr */
-             row_info,  /* row_info: */
-                /*  png_uint_32 width;       width of row */
-                /*  size_t rowbytes;         number of bytes in row */
-                /*  png_byte color_type;     color type of pixels */
-                /*  png_byte bit_depth;      bit depth of samples */
-                /*  png_byte channels;       number of channels (1-4) */
-                /*  png_byte pixel_depth;    bits per pixel (depth*channels) */
-             png_ptr->row_buf + 1);      /* start of pixel data for row */
-#endif
-
-#ifdef PNG_WRITE_FILLER_SUPPORTED
-   if ((png_ptr->transformations & PNG_FILLER) != 0)
-      png_do_strip_channel(row_info, png_ptr->row_buf + 1,
-          !(png_ptr->flags & PNG_FLAG_FILLER_AFTER));
-#endif
-
-#ifdef PNG_WRITE_PACKSWAP_SUPPORTED
-   if ((png_ptr->transformations & PNG_PACKSWAP) != 0)
-      png_do_packswap(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_WRITE_PACK_SUPPORTED
-   if ((png_ptr->transformations & PNG_PACK) != 0)
-      png_do_pack(row_info, png_ptr->row_buf + 1,
-          (png_uint_32)png_ptr->bit_depth);
-#endif
-
-#ifdef PNG_WRITE_SWAP_SUPPORTED
-#  ifdef PNG_16BIT_SUPPORTED
-   if ((png_ptr->transformations & PNG_SWAP_BYTES) != 0)
-      png_do_swap(row_info, png_ptr->row_buf + 1);
-#  endif
-#endif
-
-#ifdef PNG_WRITE_SHIFT_SUPPORTED
-   if ((png_ptr->transformations & PNG_SHIFT) != 0)
-      png_do_shift(row_info, png_ptr->row_buf + 1,
-           &(png_ptr->shift));
-#endif
-
-#ifdef PNG_WRITE_SWAP_ALPHA_SUPPORTED
-   if ((png_ptr->transformations & PNG_SWAP_ALPHA) != 0)
-      png_do_write_swap_alpha(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_WRITE_INVERT_ALPHA_SUPPORTED
-   if ((png_ptr->transformations & PNG_INVERT_ALPHA) != 0)
-      png_do_write_invert_alpha(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_WRITE_BGR_SUPPORTED
-   if ((png_ptr->transformations & PNG_BGR) != 0)
-      png_do_bgr(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_WRITE_INVERT_SUPPORTED
-   if ((png_ptr->transformations & PNG_INVERT_MONO) != 0)
-      png_do_invert(row_info, png_ptr->row_buf + 1);
-#endif
-}
-#endif /* WRITE_TRANSFORMS */
-#endif /* WRITE */
diff --git a/dep/libpng/src/pngwutil.c b/dep/libpng/src/pngwutil.c
deleted file mode 100644
index 14cc4ce36..000000000
--- a/dep/libpng/src/pngwutil.c
+++ /dev/null
@@ -1,2781 +0,0 @@
-
-/* pngwutil.c - utilities to write a PNG file
- *
- * Copyright (c) 2018-2024 Cosmin Truta
- * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
- * Copyright (c) 1996-1997 Andreas Dilger
- * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-#include "pngpriv.h"
-
-#ifdef PNG_WRITE_SUPPORTED
-
-#ifdef PNG_WRITE_INT_FUNCTIONS_SUPPORTED
-/* Place a 32-bit number into a buffer in PNG byte order.  We work
- * with unsigned numbers for convenience, although one supported
- * ancillary chunk uses signed (two's complement) numbers.
- */
-void PNGAPI
-png_save_uint_32(png_bytep buf, png_uint_32 i)
-{
-   buf[0] = (png_byte)((i >> 24) & 0xffU);
-   buf[1] = (png_byte)((i >> 16) & 0xffU);
-   buf[2] = (png_byte)((i >>  8) & 0xffU);
-   buf[3] = (png_byte)( i        & 0xffU);
-}
-
-/* Place a 16-bit number into a buffer in PNG byte order.
- * The parameter is declared unsigned int, not png_uint_16,
- * just to avoid potential problems on pre-ANSI C compilers.
- */
-void PNGAPI
-png_save_uint_16(png_bytep buf, unsigned int i)
-{
-   buf[0] = (png_byte)((i >> 8) & 0xffU);
-   buf[1] = (png_byte)( i       & 0xffU);
-}
-#endif
-
-/* Simple function to write the signature.  If we have already written
- * the magic bytes of the signature, or more likely, the PNG stream is
- * being embedded into another stream and doesn't need its own signature,
- * we should call png_set_sig_bytes() to tell libpng how many of the
- * bytes have already been written.
- */
-void PNGAPI
-png_write_sig(png_structrp png_ptr)
-{
-   png_byte png_signature[8] = {137, 80, 78, 71, 13, 10, 26, 10};
-
-#ifdef PNG_IO_STATE_SUPPORTED
-   /* Inform the I/O callback that the signature is being written */
-   png_ptr->io_state = PNG_IO_WRITING | PNG_IO_SIGNATURE;
-#endif
-
-   /* Write the rest of the 8 byte signature */
-   png_write_data(png_ptr, &png_signature[png_ptr->sig_bytes],
-       (size_t)(8 - png_ptr->sig_bytes));
-
-   if (png_ptr->sig_bytes < 3)
-      png_ptr->mode |= PNG_HAVE_PNG_SIGNATURE;
-}
-
-/* Write the start of a PNG chunk.  The type is the chunk type.
- * The total_length is the sum of the lengths of all the data you will be
- * passing in png_write_chunk_data().
- */
-static void
-png_write_chunk_header(png_structrp png_ptr, png_uint_32 chunk_name,
-    png_uint_32 length)
-{
-   png_byte buf[8];
-
-#if defined(PNG_DEBUG) && (PNG_DEBUG > 0)
-   PNG_CSTRING_FROM_CHUNK(buf, chunk_name);
-   png_debug2(0, "Writing %s chunk, length = %lu", buf, (unsigned long)length);
-#endif
-
-   if (png_ptr == NULL)
-      return;
-
-#ifdef PNG_IO_STATE_SUPPORTED
-   /* Inform the I/O callback that the chunk header is being written.
-    * PNG_IO_CHUNK_HDR requires a single I/O call.
-    */
-   png_ptr->io_state = PNG_IO_WRITING | PNG_IO_CHUNK_HDR;
-#endif
-
-   /* Write the length and the chunk name */
-   png_save_uint_32(buf, length);
-   png_save_uint_32(buf + 4, chunk_name);
-   png_write_data(png_ptr, buf, 8);
-
-   /* Put the chunk name into png_ptr->chunk_name */
-   png_ptr->chunk_name = chunk_name;
-
-   /* Reset the crc and run it over the chunk name */
-   png_reset_crc(png_ptr);
-
-   png_calculate_crc(png_ptr, buf + 4, 4);
-
-#ifdef PNG_IO_STATE_SUPPORTED
-   /* Inform the I/O callback that chunk data will (possibly) be written.
-    * PNG_IO_CHUNK_DATA does NOT require a specific number of I/O calls.
-    */
-   png_ptr->io_state = PNG_IO_WRITING | PNG_IO_CHUNK_DATA;
-#endif
-}
-
-void PNGAPI
-png_write_chunk_start(png_structrp png_ptr, png_const_bytep chunk_string,
-    png_uint_32 length)
-{
-   png_write_chunk_header(png_ptr, PNG_CHUNK_FROM_STRING(chunk_string), length);
-}
-
-/* Write the data of a PNG chunk started with png_write_chunk_header().
- * Note that multiple calls to this function are allowed, and that the
- * sum of the lengths from these calls *must* add up to the total_length
- * given to png_write_chunk_header().
- */
-void PNGAPI
-png_write_chunk_data(png_structrp png_ptr, png_const_bytep data, size_t length)
-{
-   /* Write the data, and run the CRC over it */
-   if (png_ptr == NULL)
-      return;
-
-   if (data != NULL && length > 0)
-   {
-      png_write_data(png_ptr, data, length);
-
-      /* Update the CRC after writing the data,
-       * in case the user I/O routine alters it.
-       */
-      png_calculate_crc(png_ptr, data, length);
-   }
-}
-
-/* Finish a chunk started with png_write_chunk_header(). */
-void PNGAPI
-png_write_chunk_end(png_structrp png_ptr)
-{
-   png_byte buf[4];
-
-   if (png_ptr == NULL) return;
-
-#ifdef PNG_IO_STATE_SUPPORTED
-   /* Inform the I/O callback that the chunk CRC is being written.
-    * PNG_IO_CHUNK_CRC requires a single I/O function call.
-    */
-   png_ptr->io_state = PNG_IO_WRITING | PNG_IO_CHUNK_CRC;
-#endif
-
-   /* Write the crc in a single operation */
-   png_save_uint_32(buf, png_ptr->crc);
-
-   png_write_data(png_ptr, buf, 4);
-}
-
-/* Write a PNG chunk all at once.  The type is an array of ASCII characters
- * representing the chunk name.  The array must be at least 4 bytes in
- * length, and does not need to be null terminated.  To be safe, pass the
- * pre-defined chunk names here, and if you need a new one, define it
- * where the others are defined.  The length is the length of the data.
- * All the data must be present.  If that is not possible, use the
- * png_write_chunk_start(), png_write_chunk_data(), and png_write_chunk_end()
- * functions instead.
- */
-static void
-png_write_complete_chunk(png_structrp png_ptr, png_uint_32 chunk_name,
-    png_const_bytep data, size_t length)
-{
-   if (png_ptr == NULL)
-      return;
-
-   /* On 64-bit architectures 'length' may not fit in a png_uint_32. */
-   if (length > PNG_UINT_31_MAX)
-      png_error(png_ptr, "length exceeds PNG maximum");
-
-   png_write_chunk_header(png_ptr, chunk_name, (png_uint_32)length);
-   png_write_chunk_data(png_ptr, data, length);
-   png_write_chunk_end(png_ptr);
-}
-
-/* This is the API that calls the internal function above. */
-void PNGAPI
-png_write_chunk(png_structrp png_ptr, png_const_bytep chunk_string,
-    png_const_bytep data, size_t length)
-{
-   png_write_complete_chunk(png_ptr, PNG_CHUNK_FROM_STRING(chunk_string), data,
-       length);
-}
-
-/* This is used below to find the size of an image to pass to png_deflate_claim,
- * so it only needs to be accurate if the size is less than 16384 bytes (the
- * point at which a lower LZ window size can be used.)
- */
-static png_alloc_size_t
-png_image_size(png_structrp png_ptr)
-{
-   /* Only return sizes up to the maximum of a png_uint_32; do this by limiting
-    * the width and height used to 15 bits.
-    */
-   png_uint_32 h = png_ptr->height;
-
-   if (png_ptr->rowbytes < 32768 && h < 32768)
-   {
-      if (png_ptr->interlaced != 0)
-      {
-         /* Interlacing makes the image larger because of the replication of
-          * both the filter byte and the padding to a byte boundary.
-          */
-         png_uint_32 w = png_ptr->width;
-         unsigned int pd = png_ptr->pixel_depth;
-         png_alloc_size_t cb_base;
-         int pass;
-
-         for (cb_base=0, pass=0; pass<=6; ++pass)
-         {
-            png_uint_32 pw = PNG_PASS_COLS(w, pass);
-
-            if (pw > 0)
-               cb_base += (PNG_ROWBYTES(pd, pw)+1) * PNG_PASS_ROWS(h, pass);
-         }
-
-         return cb_base;
-      }
-
-      else
-         return (png_ptr->rowbytes+1) * h;
-   }
-
-   else
-      return 0xffffffffU;
-}
-
-#ifdef PNG_WRITE_OPTIMIZE_CMF_SUPPORTED
-   /* This is the code to hack the first two bytes of the deflate stream (the
-    * deflate header) to correct the windowBits value to match the actual data
-    * size.  Note that the second argument is the *uncompressed* size but the
-    * first argument is the *compressed* data (and it must be deflate
-    * compressed.)
-    */
-static void
-optimize_cmf(png_bytep data, png_alloc_size_t data_size)
-{
-   /* Optimize the CMF field in the zlib stream.  The resultant zlib stream is
-    * still compliant to the stream specification.
-    */
-   if (data_size <= 16384) /* else windowBits must be 15 */
-   {
-      unsigned int z_cmf = data[0];  /* zlib compression method and flags */
-
-      if ((z_cmf & 0x0f) == 8 && (z_cmf & 0xf0) <= 0x70)
-      {
-         unsigned int z_cinfo;
-         unsigned int half_z_window_size;
-
-         z_cinfo = z_cmf >> 4;
-         half_z_window_size = 1U << (z_cinfo + 7);
-
-         if (data_size <= half_z_window_size) /* else no change */
-         {
-            unsigned int tmp;
-
-            do
-            {
-               half_z_window_size >>= 1;
-               --z_cinfo;
-            }
-            while (z_cinfo > 0 && data_size <= half_z_window_size);
-
-            z_cmf = (z_cmf & 0x0f) | (z_cinfo << 4);
-
-            data[0] = (png_byte)z_cmf;
-            tmp = data[1] & 0xe0;
-            tmp += 0x1f - ((z_cmf << 8) + tmp) % 0x1f;
-            data[1] = (png_byte)tmp;
-         }
-      }
-   }
-}
-#endif /* WRITE_OPTIMIZE_CMF */
-
-/* Initialize the compressor for the appropriate type of compression. */
-static int
-png_deflate_claim(png_structrp png_ptr, png_uint_32 owner,
-    png_alloc_size_t data_size)
-{
-   if (png_ptr->zowner != 0)
-   {
-#if defined(PNG_WARNINGS_SUPPORTED) || defined(PNG_ERROR_TEXT_SUPPORTED)
-      char msg[64];
-
-      PNG_STRING_FROM_CHUNK(msg, owner);
-      msg[4] = ':';
-      msg[5] = ' ';
-      PNG_STRING_FROM_CHUNK(msg+6, png_ptr->zowner);
-      /* So the message that results is "<chunk> using zstream"; this is an
-       * internal error, but is very useful for debugging.  i18n requirements
-       * are minimal.
-       */
-      (void)png_safecat(msg, (sizeof msg), 10, " using zstream");
-#endif
-#if PNG_RELEASE_BUILD
-         png_warning(png_ptr, msg);
-
-         /* Attempt sane error recovery */
-         if (png_ptr->zowner == png_IDAT) /* don't steal from IDAT */
-         {
-            png_ptr->zstream.msg = PNGZ_MSG_CAST("in use by IDAT");
-            return Z_STREAM_ERROR;
-         }
-
-         png_ptr->zowner = 0;
-#else
-         png_error(png_ptr, msg);
-#endif
-   }
-
-   {
-      int level = png_ptr->zlib_level;
-      int method = png_ptr->zlib_method;
-      int windowBits = png_ptr->zlib_window_bits;
-      int memLevel = png_ptr->zlib_mem_level;
-      int strategy; /* set below */
-      int ret; /* zlib return code */
-
-      if (owner == png_IDAT)
-      {
-         if ((png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_STRATEGY) != 0)
-            strategy = png_ptr->zlib_strategy;
-
-         else if (png_ptr->do_filter != PNG_FILTER_NONE)
-            strategy = PNG_Z_DEFAULT_STRATEGY;
-
-         else
-            strategy = PNG_Z_DEFAULT_NOFILTER_STRATEGY;
-      }
-
-      else
-      {
-#ifdef PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED
-            level = png_ptr->zlib_text_level;
-            method = png_ptr->zlib_text_method;
-            windowBits = png_ptr->zlib_text_window_bits;
-            memLevel = png_ptr->zlib_text_mem_level;
-            strategy = png_ptr->zlib_text_strategy;
-#else
-            /* If customization is not supported the values all come from the
-             * IDAT values except for the strategy, which is fixed to the
-             * default.  (This is the pre-1.6.0 behavior too, although it was
-             * implemented in a very different way.)
-             */
-            strategy = Z_DEFAULT_STRATEGY;
-#endif
-      }
-
-      /* Adjust 'windowBits' down if larger than 'data_size'; to stop this
-       * happening just pass 32768 as the data_size parameter.  Notice that zlib
-       * requires an extra 262 bytes in the window in addition to the data to be
-       * able to see the whole of the data, so if data_size+262 takes us to the
-       * next windowBits size we need to fix up the value later.  (Because even
-       * though deflate needs the extra window, inflate does not!)
-       */
-      if (data_size <= 16384)
-      {
-         /* IMPLEMENTATION NOTE: this 'half_window_size' stuff is only here to
-          * work round a Microsoft Visual C misbehavior which, contrary to C-90,
-          * widens the result of the following shift to 64-bits if (and,
-          * apparently, only if) it is used in a test.
-          */
-         unsigned int half_window_size = 1U << (windowBits-1);
-
-         while (data_size + 262 <= half_window_size)
-         {
-            half_window_size >>= 1;
-            --windowBits;
-         }
-      }
-
-      /* Check against the previous initialized values, if any. */
-      if ((png_ptr->flags & PNG_FLAG_ZSTREAM_INITIALIZED) != 0 &&
-         (png_ptr->zlib_set_level != level ||
-         png_ptr->zlib_set_method != method ||
-         png_ptr->zlib_set_window_bits != windowBits ||
-         png_ptr->zlib_set_mem_level != memLevel ||
-         png_ptr->zlib_set_strategy != strategy))
-      {
-         if (deflateEnd(&png_ptr->zstream) != Z_OK)
-            png_warning(png_ptr, "deflateEnd failed (ignored)");
-
-         png_ptr->flags &= ~PNG_FLAG_ZSTREAM_INITIALIZED;
-      }
-
-      /* For safety clear out the input and output pointers (currently zlib
-       * doesn't use them on Init, but it might in the future).
-       */
-      png_ptr->zstream.next_in = NULL;
-      png_ptr->zstream.avail_in = 0;
-      png_ptr->zstream.next_out = NULL;
-      png_ptr->zstream.avail_out = 0;
-
-      /* Now initialize if required, setting the new parameters, otherwise just
-       * do a simple reset to the previous parameters.
-       */
-      if ((png_ptr->flags & PNG_FLAG_ZSTREAM_INITIALIZED) != 0)
-         ret = deflateReset(&png_ptr->zstream);
-
-      else
-      {
-         ret = deflateInit2(&png_ptr->zstream, level, method, windowBits,
-             memLevel, strategy);
-
-         if (ret == Z_OK)
-            png_ptr->flags |= PNG_FLAG_ZSTREAM_INITIALIZED;
-      }
-
-      /* The return code is from either deflateReset or deflateInit2; they have
-       * pretty much the same set of error codes.
-       */
-      if (ret == Z_OK)
-         png_ptr->zowner = owner;
-
-      else
-         png_zstream_error(png_ptr, ret);
-
-      return ret;
-   }
-}
-
-/* Clean up (or trim) a linked list of compression buffers. */
-void /* PRIVATE */
-png_free_buffer_list(png_structrp png_ptr, png_compression_bufferp *listp)
-{
-   png_compression_bufferp list = *listp;
-
-   if (list != NULL)
-   {
-      *listp = NULL;
-
-      do
-      {
-         png_compression_bufferp next = list->next;
-
-         png_free(png_ptr, list);
-         list = next;
-      }
-      while (list != NULL);
-   }
-}
-
-#ifdef PNG_WRITE_COMPRESSED_TEXT_SUPPORTED
-/* This pair of functions encapsulates the operation of (a) compressing a
- * text string, and (b) issuing it later as a series of chunk data writes.
- * The compression_state structure is shared context for these functions
- * set up by the caller to allow access to the relevant local variables.
- *
- * compression_buffer (new in 1.6.0) is just a linked list of zbuffer_size
- * temporary buffers.  From 1.6.0 it is retained in png_struct so that it will
- * be correctly freed in the event of a write error (previous implementations
- * just leaked memory.)
- */
-typedef struct
-{
-   png_const_bytep      input;        /* The uncompressed input data */
-   png_alloc_size_t     input_len;    /* Its length */
-   png_uint_32          output_len;   /* Final compressed length */
-   png_byte             output[1024]; /* First block of output */
-} compression_state;
-
-static void
-png_text_compress_init(compression_state *comp, png_const_bytep input,
-    png_alloc_size_t input_len)
-{
-   comp->input = input;
-   comp->input_len = input_len;
-   comp->output_len = 0;
-}
-
-/* Compress the data in the compression state input */
-static int
-png_text_compress(png_structrp png_ptr, png_uint_32 chunk_name,
-    compression_state *comp, png_uint_32 prefix_len)
-{
-   int ret;
-
-   /* To find the length of the output it is necessary to first compress the
-    * input. The result is buffered rather than using the two-pass algorithm
-    * that is used on the inflate side; deflate is assumed to be slower and a
-    * PNG writer is assumed to have more memory available than a PNG reader.
-    *
-    * IMPLEMENTATION NOTE: the zlib API deflateBound() can be used to find an
-    * upper limit on the output size, but it is always bigger than the input
-    * size so it is likely to be more efficient to use this linked-list
-    * approach.
-    */
-   ret = png_deflate_claim(png_ptr, chunk_name, comp->input_len);
-
-   if (ret != Z_OK)
-      return ret;
-
-   /* Set up the compression buffers, we need a loop here to avoid overflowing a
-    * uInt.  Use ZLIB_IO_MAX to limit the input.  The output is always limited
-    * by the output buffer size, so there is no need to check that.  Since this
-    * is ANSI-C we know that an 'int', hence a uInt, is always at least 16 bits
-    * in size.
-    */
-   {
-      png_compression_bufferp *end = &png_ptr->zbuffer_list;
-      png_alloc_size_t input_len = comp->input_len; /* may be zero! */
-      png_uint_32 output_len;
-
-      /* zlib updates these for us: */
-      png_ptr->zstream.next_in = PNGZ_INPUT_CAST(comp->input);
-      png_ptr->zstream.avail_in = 0; /* Set below */
-      png_ptr->zstream.next_out = comp->output;
-      png_ptr->zstream.avail_out = (sizeof comp->output);
-
-      output_len = png_ptr->zstream.avail_out;
-
-      do
-      {
-         uInt avail_in = ZLIB_IO_MAX;
-
-         if (avail_in > input_len)
-            avail_in = (uInt)input_len;
-
-         input_len -= avail_in;
-
-         png_ptr->zstream.avail_in = avail_in;
-
-         if (png_ptr->zstream.avail_out == 0)
-         {
-            png_compression_buffer *next;
-
-            /* Chunk data is limited to 2^31 bytes in length, so the prefix
-             * length must be counted here.
-             */
-            if (output_len + prefix_len > PNG_UINT_31_MAX)
-            {
-               ret = Z_MEM_ERROR;
-               break;
-            }
-
-            /* Need a new (malloc'ed) buffer, but there may be one present
-             * already.
-             */
-            next = *end;
-            if (next == NULL)
-            {
-               next = png_voidcast(png_compression_bufferp, png_malloc_base
-                  (png_ptr, PNG_COMPRESSION_BUFFER_SIZE(png_ptr)));
-
-               if (next == NULL)
-               {
-                  ret = Z_MEM_ERROR;
-                  break;
-               }
-
-               /* Link in this buffer (so that it will be freed later) */
-               next->next = NULL;
-               *end = next;
-            }
-
-            png_ptr->zstream.next_out = next->output;
-            png_ptr->zstream.avail_out = png_ptr->zbuffer_size;
-            output_len += png_ptr->zstream.avail_out;
-
-            /* Move 'end' to the next buffer pointer. */
-            end = &next->next;
-         }
-
-         /* Compress the data */
-         ret = deflate(&png_ptr->zstream,
-             input_len > 0 ? Z_NO_FLUSH : Z_FINISH);
-
-         /* Claw back input data that was not consumed (because avail_in is
-          * reset above every time round the loop).
-          */
-         input_len += png_ptr->zstream.avail_in;
-         png_ptr->zstream.avail_in = 0; /* safety */
-      }
-      while (ret == Z_OK);
-
-      /* There may be some space left in the last output buffer. This needs to
-       * be subtracted from output_len.
-       */
-      output_len -= png_ptr->zstream.avail_out;
-      png_ptr->zstream.avail_out = 0; /* safety */
-      comp->output_len = output_len;
-
-      /* Now double check the output length, put in a custom message if it is
-       * too long.  Otherwise ensure the z_stream::msg pointer is set to
-       * something.
-       */
-      if (output_len + prefix_len >= PNG_UINT_31_MAX)
-      {
-         png_ptr->zstream.msg = PNGZ_MSG_CAST("compressed data too long");
-         ret = Z_MEM_ERROR;
-      }
-
-      else
-         png_zstream_error(png_ptr, ret);
-
-      /* Reset zlib for another zTXt/iTXt or image data */
-      png_ptr->zowner = 0;
-
-      /* The only success case is Z_STREAM_END, input_len must be 0; if not this
-       * is an internal error.
-       */
-      if (ret == Z_STREAM_END && input_len == 0)
-      {
-#ifdef PNG_WRITE_OPTIMIZE_CMF_SUPPORTED
-         /* Fix up the deflate header, if required */
-         optimize_cmf(comp->output, comp->input_len);
-#endif
-         /* But Z_OK is returned, not Z_STREAM_END; this allows the claim
-          * function above to return Z_STREAM_END on an error (though it never
-          * does in the current versions of zlib.)
-          */
-         return Z_OK;
-      }
-
-      else
-         return ret;
-   }
-}
-
-/* Ship the compressed text out via chunk writes */
-static void
-png_write_compressed_data_out(png_structrp png_ptr, compression_state *comp)
-{
-   png_uint_32 output_len = comp->output_len;
-   png_const_bytep output = comp->output;
-   png_uint_32 avail = (sizeof comp->output);
-   png_compression_buffer *next = png_ptr->zbuffer_list;
-
-   for (;;)
-   {
-      if (avail > output_len)
-         avail = output_len;
-
-      png_write_chunk_data(png_ptr, output, avail);
-
-      output_len -= avail;
-
-      if (output_len == 0 || next == NULL)
-         break;
-
-      avail = png_ptr->zbuffer_size;
-      output = next->output;
-      next = next->next;
-   }
-
-   /* This is an internal error; 'next' must have been NULL! */
-   if (output_len > 0)
-      png_error(png_ptr, "error writing ancillary chunked compressed data");
-}
-#endif /* WRITE_COMPRESSED_TEXT */
-
-/* Write the IHDR chunk, and update the png_struct with the necessary
- * information.  Note that the rest of this code depends upon this
- * information being correct.
- */
-void /* PRIVATE */
-png_write_IHDR(png_structrp png_ptr, png_uint_32 width, png_uint_32 height,
-    int bit_depth, int color_type, int compression_type, int filter_type,
-    int interlace_type)
-{
-   png_byte buf[13]; /* Buffer to store the IHDR info */
-   int is_invalid_depth;
-
-   png_debug(1, "in png_write_IHDR");
-
-   /* Check that we have valid input data from the application info */
-   switch (color_type)
-   {
-      case PNG_COLOR_TYPE_GRAY:
-         switch (bit_depth)
-         {
-            case 1:
-            case 2:
-            case 4:
-            case 8:
-#ifdef PNG_WRITE_16BIT_SUPPORTED
-            case 16:
-#endif
-               png_ptr->channels = 1; break;
-
-            default:
-               png_error(png_ptr,
-                   "Invalid bit depth for grayscale image");
-         }
-         break;
-
-      case PNG_COLOR_TYPE_RGB:
-         is_invalid_depth = (bit_depth != 8);
-#ifdef PNG_WRITE_16BIT_SUPPORTED
-         is_invalid_depth = (is_invalid_depth && bit_depth != 16);
-#endif
-         if (is_invalid_depth)
-            png_error(png_ptr, "Invalid bit depth for RGB image");
-
-         png_ptr->channels = 3;
-         break;
-
-      case PNG_COLOR_TYPE_PALETTE:
-         switch (bit_depth)
-         {
-            case 1:
-            case 2:
-            case 4:
-            case 8:
-               png_ptr->channels = 1;
-               break;
-
-            default:
-               png_error(png_ptr, "Invalid bit depth for paletted image");
-         }
-         break;
-
-      case PNG_COLOR_TYPE_GRAY_ALPHA:
-         is_invalid_depth = (bit_depth != 8);
-#ifdef PNG_WRITE_16BIT_SUPPORTED
-         is_invalid_depth = (is_invalid_depth && bit_depth != 16);
-#endif
-         if (is_invalid_depth)
-            png_error(png_ptr, "Invalid bit depth for grayscale+alpha image");
-
-         png_ptr->channels = 2;
-         break;
-
-      case PNG_COLOR_TYPE_RGB_ALPHA:
-         is_invalid_depth = (bit_depth != 8);
-#ifdef PNG_WRITE_16BIT_SUPPORTED
-         is_invalid_depth = (is_invalid_depth && bit_depth != 16);
-#endif
-         if (is_invalid_depth)
-            png_error(png_ptr, "Invalid bit depth for RGBA image");
-
-         png_ptr->channels = 4;
-         break;
-
-      default:
-         png_error(png_ptr, "Invalid image color type specified");
-   }
-
-   if (compression_type != PNG_COMPRESSION_TYPE_BASE)
-   {
-      png_warning(png_ptr, "Invalid compression type specified");
-      compression_type = PNG_COMPRESSION_TYPE_BASE;
-   }
-
-   /* Write filter_method 64 (intrapixel differencing) only if
-    * 1. Libpng was compiled with PNG_MNG_FEATURES_SUPPORTED and
-    * 2. Libpng did not write a PNG signature (this filter_method is only
-    *    used in PNG datastreams that are embedded in MNG datastreams) and
-    * 3. The application called png_permit_mng_features with a mask that
-    *    included PNG_FLAG_MNG_FILTER_64 and
-    * 4. The filter_method is 64 and
-    * 5. The color_type is RGB or RGBA
-    */
-   if (
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-       !((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) != 0 &&
-       ((png_ptr->mode & PNG_HAVE_PNG_SIGNATURE) == 0) &&
-       (color_type == PNG_COLOR_TYPE_RGB ||
-        color_type == PNG_COLOR_TYPE_RGB_ALPHA) &&
-       (filter_type == PNG_INTRAPIXEL_DIFFERENCING)) &&
-#endif
-       filter_type != PNG_FILTER_TYPE_BASE)
-   {
-      png_warning(png_ptr, "Invalid filter type specified");
-      filter_type = PNG_FILTER_TYPE_BASE;
-   }
-
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-   if (interlace_type != PNG_INTERLACE_NONE &&
-       interlace_type != PNG_INTERLACE_ADAM7)
-   {
-      png_warning(png_ptr, "Invalid interlace type specified");
-      interlace_type = PNG_INTERLACE_ADAM7;
-   }
-#else
-   interlace_type=PNG_INTERLACE_NONE;
-#endif
-
-   /* Save the relevant information */
-   png_ptr->bit_depth = (png_byte)bit_depth;
-   png_ptr->color_type = (png_byte)color_type;
-   png_ptr->interlaced = (png_byte)interlace_type;
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-   png_ptr->filter_type = (png_byte)filter_type;
-#endif
-   png_ptr->compression_type = (png_byte)compression_type;
-   png_ptr->width = width;
-   png_ptr->height = height;
-
-   png_ptr->pixel_depth = (png_byte)(bit_depth * png_ptr->channels);
-   png_ptr->rowbytes = PNG_ROWBYTES(png_ptr->pixel_depth, width);
-   /* Set the usr info, so any transformations can modify it */
-   png_ptr->usr_width = png_ptr->width;
-   png_ptr->usr_bit_depth = png_ptr->bit_depth;
-   png_ptr->usr_channels = png_ptr->channels;
-
-   /* Pack the header information into the buffer */
-   png_save_uint_32(buf, width);
-   png_save_uint_32(buf + 4, height);
-   buf[8] = (png_byte)bit_depth;
-   buf[9] = (png_byte)color_type;
-   buf[10] = (png_byte)compression_type;
-   buf[11] = (png_byte)filter_type;
-   buf[12] = (png_byte)interlace_type;
-
-   /* Write the chunk */
-   png_write_complete_chunk(png_ptr, png_IHDR, buf, 13);
-
-   if ((png_ptr->do_filter) == PNG_NO_FILTERS)
-   {
-      if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE ||
-          png_ptr->bit_depth < 8)
-         png_ptr->do_filter = PNG_FILTER_NONE;
-
-      else
-         png_ptr->do_filter = PNG_ALL_FILTERS;
-   }
-
-   png_ptr->mode = PNG_HAVE_IHDR; /* not READY_FOR_ZTXT */
-}
-
-/* Write the palette.  We are careful not to trust png_color to be in the
- * correct order for PNG, so people can redefine it to any convenient
- * structure.
- */
-void /* PRIVATE */
-png_write_PLTE(png_structrp png_ptr, png_const_colorp palette,
-    png_uint_32 num_pal)
-{
-   png_uint_32 max_palette_length, i;
-   png_const_colorp pal_ptr;
-   png_byte buf[3];
-
-   png_debug(1, "in png_write_PLTE");
-
-   max_palette_length = (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) ?
-      (1 << png_ptr->bit_depth) : PNG_MAX_PALETTE_LENGTH;
-
-   if ((
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-       (png_ptr->mng_features_permitted & PNG_FLAG_MNG_EMPTY_PLTE) == 0 &&
-#endif
-       num_pal == 0) || num_pal > max_palette_length)
-   {
-      if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-      {
-         png_error(png_ptr, "Invalid number of colors in palette");
-      }
-
-      else
-      {
-         png_warning(png_ptr, "Invalid number of colors in palette");
-         return;
-      }
-   }
-
-   if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) == 0)
-   {
-      png_warning(png_ptr,
-          "Ignoring request to write a PLTE chunk in grayscale PNG");
-
-      return;
-   }
-
-   png_ptr->num_palette = (png_uint_16)num_pal;
-   png_debug1(3, "num_palette = %d", png_ptr->num_palette);
-
-   png_write_chunk_header(png_ptr, png_PLTE, (png_uint_32)(num_pal * 3));
-#ifdef PNG_POINTER_INDEXING_SUPPORTED
-
-   for (i = 0, pal_ptr = palette; i < num_pal; i++, pal_ptr++)
-   {
-      buf[0] = pal_ptr->red;
-      buf[1] = pal_ptr->green;
-      buf[2] = pal_ptr->blue;
-      png_write_chunk_data(png_ptr, buf, 3);
-   }
-
-#else
-   /* This is a little slower but some buggy compilers need to do this
-    * instead
-    */
-   pal_ptr=palette;
-
-   for (i = 0; i < num_pal; i++)
-   {
-      buf[0] = pal_ptr[i].red;
-      buf[1] = pal_ptr[i].green;
-      buf[2] = pal_ptr[i].blue;
-      png_write_chunk_data(png_ptr, buf, 3);
-   }
-
-#endif
-   png_write_chunk_end(png_ptr);
-   png_ptr->mode |= PNG_HAVE_PLTE;
-}
-
-/* This is similar to png_text_compress, above, except that it does not require
- * all of the data at once and, instead of buffering the compressed result,
- * writes it as IDAT chunks.  Unlike png_text_compress it *can* png_error out
- * because it calls the write interface.  As a result it does its own error
- * reporting and does not return an error code.  In the event of error it will
- * just call png_error.  The input data length may exceed 32-bits.  The 'flush'
- * parameter is exactly the same as that to deflate, with the following
- * meanings:
- *
- * Z_NO_FLUSH: normal incremental output of compressed data
- * Z_SYNC_FLUSH: do a SYNC_FLUSH, used by png_write_flush
- * Z_FINISH: this is the end of the input, do a Z_FINISH and clean up
- *
- * The routine manages the acquire and release of the png_ptr->zstream by
- * checking and (at the end) clearing png_ptr->zowner; it does some sanity
- * checks on the 'mode' flags while doing this.
- */
-void /* PRIVATE */
-png_compress_IDAT(png_structrp png_ptr, png_const_bytep input,
-    png_alloc_size_t input_len, int flush)
-{
-   if (png_ptr->zowner != png_IDAT)
-   {
-      /* First time.   Ensure we have a temporary buffer for compression and
-       * trim the buffer list if it has more than one entry to free memory.
-       * If 'WRITE_COMPRESSED_TEXT' is not set the list will never have been
-       * created at this point, but the check here is quick and safe.
-       */
-      if (png_ptr->zbuffer_list == NULL)
-      {
-         png_ptr->zbuffer_list = png_voidcast(png_compression_bufferp,
-             png_malloc(png_ptr, PNG_COMPRESSION_BUFFER_SIZE(png_ptr)));
-         png_ptr->zbuffer_list->next = NULL;
-      }
-
-      else
-         png_free_buffer_list(png_ptr, &png_ptr->zbuffer_list->next);
-
-      /* It is a terminal error if we can't claim the zstream. */
-      if (png_deflate_claim(png_ptr, png_IDAT, png_image_size(png_ptr)) != Z_OK)
-         png_error(png_ptr, png_ptr->zstream.msg);
-
-      /* The output state is maintained in png_ptr->zstream, so it must be
-       * initialized here after the claim.
-       */
-      png_ptr->zstream.next_out = png_ptr->zbuffer_list->output;
-      png_ptr->zstream.avail_out = png_ptr->zbuffer_size;
-   }
-
-   /* Now loop reading and writing until all the input is consumed or an error
-    * terminates the operation.  The _out values are maintained across calls to
-    * this function, but the input must be reset each time.
-    */
-   png_ptr->zstream.next_in = PNGZ_INPUT_CAST(input);
-   png_ptr->zstream.avail_in = 0; /* set below */
-   for (;;)
-   {
-      int ret;
-
-      /* INPUT: from the row data */
-      uInt avail = ZLIB_IO_MAX;
-
-      if (avail > input_len)
-         avail = (uInt)input_len; /* safe because of the check */
-
-      png_ptr->zstream.avail_in = avail;
-      input_len -= avail;
-
-      ret = deflate(&png_ptr->zstream, input_len > 0 ? Z_NO_FLUSH : flush);
-
-      /* Include as-yet unconsumed input */
-      input_len += png_ptr->zstream.avail_in;
-      png_ptr->zstream.avail_in = 0;
-
-      /* OUTPUT: write complete IDAT chunks when avail_out drops to zero. Note
-       * that these two zstream fields are preserved across the calls, therefore
-       * there is no need to set these up on entry to the loop.
-       */
-      if (png_ptr->zstream.avail_out == 0)
-      {
-         png_bytep data = png_ptr->zbuffer_list->output;
-         uInt size = png_ptr->zbuffer_size;
-
-         /* Write an IDAT containing the data then reset the buffer.  The
-          * first IDAT may need deflate header optimization.
-          */
-#ifdef PNG_WRITE_OPTIMIZE_CMF_SUPPORTED
-            if ((png_ptr->mode & PNG_HAVE_IDAT) == 0 &&
-                png_ptr->compression_type == PNG_COMPRESSION_TYPE_BASE)
-               optimize_cmf(data, png_image_size(png_ptr));
-#endif
-
-         if (size > 0)
-            png_write_complete_chunk(png_ptr, png_IDAT, data, size);
-         png_ptr->mode |= PNG_HAVE_IDAT;
-
-         png_ptr->zstream.next_out = data;
-         png_ptr->zstream.avail_out = size;
-
-         /* For SYNC_FLUSH or FINISH it is essential to keep calling zlib with
-          * the same flush parameter until it has finished output, for NO_FLUSH
-          * it doesn't matter.
-          */
-         if (ret == Z_OK && flush != Z_NO_FLUSH)
-            continue;
-      }
-
-      /* The order of these checks doesn't matter much; it just affects which
-       * possible error might be detected if multiple things go wrong at once.
-       */
-      if (ret == Z_OK) /* most likely return code! */
-      {
-         /* If all the input has been consumed then just return.  If Z_FINISH
-          * was used as the flush parameter something has gone wrong if we get
-          * here.
-          */
-         if (input_len == 0)
-         {
-            if (flush == Z_FINISH)
-               png_error(png_ptr, "Z_OK on Z_FINISH with output space");
-
-            return;
-         }
-      }
-
-      else if (ret == Z_STREAM_END && flush == Z_FINISH)
-      {
-         /* This is the end of the IDAT data; any pending output must be
-          * flushed.  For small PNG files we may still be at the beginning.
-          */
-         png_bytep data = png_ptr->zbuffer_list->output;
-         uInt size = png_ptr->zbuffer_size - png_ptr->zstream.avail_out;
-
-#ifdef PNG_WRITE_OPTIMIZE_CMF_SUPPORTED
-         if ((png_ptr->mode & PNG_HAVE_IDAT) == 0 &&
-             png_ptr->compression_type == PNG_COMPRESSION_TYPE_BASE)
-            optimize_cmf(data, png_image_size(png_ptr));
-#endif
-
-         if (size > 0)
-            png_write_complete_chunk(png_ptr, png_IDAT, data, size);
-         png_ptr->zstream.avail_out = 0;
-         png_ptr->zstream.next_out = NULL;
-         png_ptr->mode |= PNG_HAVE_IDAT | PNG_AFTER_IDAT;
-
-         png_ptr->zowner = 0; /* Release the stream */
-         return;
-      }
-
-      else
-      {
-         /* This is an error condition. */
-         png_zstream_error(png_ptr, ret);
-         png_error(png_ptr, png_ptr->zstream.msg);
-      }
-   }
-}
-
-/* Write an IEND chunk */
-void /* PRIVATE */
-png_write_IEND(png_structrp png_ptr)
-{
-   png_debug(1, "in png_write_IEND");
-
-   png_write_complete_chunk(png_ptr, png_IEND, NULL, 0);
-   png_ptr->mode |= PNG_HAVE_IEND;
-}
-
-#ifdef PNG_WRITE_gAMA_SUPPORTED
-/* Write a gAMA chunk */
-void /* PRIVATE */
-png_write_gAMA_fixed(png_structrp png_ptr, png_fixed_point file_gamma)
-{
-   png_byte buf[4];
-
-   png_debug(1, "in png_write_gAMA");
-
-   /* file_gamma is saved in 1/100,000ths */
-   png_save_uint_32(buf, (png_uint_32)file_gamma);
-   png_write_complete_chunk(png_ptr, png_gAMA, buf, 4);
-}
-#endif
-
-#ifdef PNG_WRITE_sRGB_SUPPORTED
-/* Write a sRGB chunk */
-void /* PRIVATE */
-png_write_sRGB(png_structrp png_ptr, int srgb_intent)
-{
-   png_byte buf[1];
-
-   png_debug(1, "in png_write_sRGB");
-
-   if (srgb_intent >= PNG_sRGB_INTENT_LAST)
-      png_warning(png_ptr,
-          "Invalid sRGB rendering intent specified");
-
-   buf[0]=(png_byte)srgb_intent;
-   png_write_complete_chunk(png_ptr, png_sRGB, buf, 1);
-}
-#endif
-
-#ifdef PNG_WRITE_iCCP_SUPPORTED
-/* Write an iCCP chunk */
-void /* PRIVATE */
-png_write_iCCP(png_structrp png_ptr, png_const_charp name,
-    png_const_bytep profile)
-{
-   png_uint_32 name_len;
-   png_uint_32 profile_len;
-   png_byte new_name[81]; /* 1 byte for the compression byte */
-   compression_state comp;
-   png_uint_32 temp;
-
-   png_debug(1, "in png_write_iCCP");
-
-   /* These are all internal problems: the profile should have been checked
-    * before when it was stored.
-    */
-   if (profile == NULL)
-      png_error(png_ptr, "No profile for iCCP chunk"); /* internal error */
-
-   profile_len = png_get_uint_32(profile);
-
-   if (profile_len < 132)
-      png_error(png_ptr, "ICC profile too short");
-
-   temp = (png_uint_32) (*(profile+8));
-   if (temp > 3 && (profile_len & 0x03))
-      png_error(png_ptr, "ICC profile length invalid (not a multiple of 4)");
-
-   {
-      png_uint_32 embedded_profile_len = png_get_uint_32(profile);
-
-      if (profile_len != embedded_profile_len)
-         png_error(png_ptr, "Profile length does not match profile");
-   }
-
-   name_len = png_check_keyword(png_ptr, name, new_name);
-
-   if (name_len == 0)
-      png_error(png_ptr, "iCCP: invalid keyword");
-
-   new_name[++name_len] = PNG_COMPRESSION_TYPE_BASE;
-
-   /* Make sure we include the NULL after the name and the compression type */
-   ++name_len;
-
-   png_text_compress_init(&comp, profile, profile_len);
-
-   /* Allow for keyword terminator and compression byte */
-   if (png_text_compress(png_ptr, png_iCCP, &comp, name_len) != Z_OK)
-      png_error(png_ptr, png_ptr->zstream.msg);
-
-   png_write_chunk_header(png_ptr, png_iCCP, name_len + comp.output_len);
-
-   png_write_chunk_data(png_ptr, new_name, name_len);
-
-   png_write_compressed_data_out(png_ptr, &comp);
-
-   png_write_chunk_end(png_ptr);
-}
-#endif
-
-#ifdef PNG_WRITE_sPLT_SUPPORTED
-/* Write a sPLT chunk */
-void /* PRIVATE */
-png_write_sPLT(png_structrp png_ptr, png_const_sPLT_tp spalette)
-{
-   png_uint_32 name_len;
-   png_byte new_name[80];
-   png_byte entrybuf[10];
-   size_t entry_size = (spalette->depth == 8 ? 6 : 10);
-   size_t palette_size = entry_size * (size_t)spalette->nentries;
-   png_sPLT_entryp ep;
-#ifndef PNG_POINTER_INDEXING_SUPPORTED
-   int i;
-#endif
-
-   png_debug(1, "in png_write_sPLT");
-
-   name_len = png_check_keyword(png_ptr, spalette->name, new_name);
-
-   if (name_len == 0)
-      png_error(png_ptr, "sPLT: invalid keyword");
-
-   /* Make sure we include the NULL after the name */
-   png_write_chunk_header(png_ptr, png_sPLT,
-       (png_uint_32)(name_len + 2 + palette_size));
-
-   png_write_chunk_data(png_ptr, (png_bytep)new_name, (size_t)(name_len + 1));
-
-   png_write_chunk_data(png_ptr, &spalette->depth, 1);
-
-   /* Loop through each palette entry, writing appropriately */
-#ifdef PNG_POINTER_INDEXING_SUPPORTED
-   for (ep = spalette->entries; ep<spalette->entries + spalette->nentries; ep++)
-   {
-      if (spalette->depth == 8)
-      {
-         entrybuf[0] = (png_byte)ep->red;
-         entrybuf[1] = (png_byte)ep->green;
-         entrybuf[2] = (png_byte)ep->blue;
-         entrybuf[3] = (png_byte)ep->alpha;
-         png_save_uint_16(entrybuf + 4, ep->frequency);
-      }
-
-      else
-      {
-         png_save_uint_16(entrybuf + 0, ep->red);
-         png_save_uint_16(entrybuf + 2, ep->green);
-         png_save_uint_16(entrybuf + 4, ep->blue);
-         png_save_uint_16(entrybuf + 6, ep->alpha);
-         png_save_uint_16(entrybuf + 8, ep->frequency);
-      }
-
-      png_write_chunk_data(png_ptr, entrybuf, entry_size);
-   }
-#else
-   ep=spalette->entries;
-   for (i = 0; i>spalette->nentries; i++)
-   {
-      if (spalette->depth == 8)
-      {
-         entrybuf[0] = (png_byte)ep[i].red;
-         entrybuf[1] = (png_byte)ep[i].green;
-         entrybuf[2] = (png_byte)ep[i].blue;
-         entrybuf[3] = (png_byte)ep[i].alpha;
-         png_save_uint_16(entrybuf + 4, ep[i].frequency);
-      }
-
-      else
-      {
-         png_save_uint_16(entrybuf + 0, ep[i].red);
-         png_save_uint_16(entrybuf + 2, ep[i].green);
-         png_save_uint_16(entrybuf + 4, ep[i].blue);
-         png_save_uint_16(entrybuf + 6, ep[i].alpha);
-         png_save_uint_16(entrybuf + 8, ep[i].frequency);
-      }
-
-      png_write_chunk_data(png_ptr, entrybuf, entry_size);
-   }
-#endif
-
-   png_write_chunk_end(png_ptr);
-}
-#endif
-
-#ifdef PNG_WRITE_sBIT_SUPPORTED
-/* Write the sBIT chunk */
-void /* PRIVATE */
-png_write_sBIT(png_structrp png_ptr, png_const_color_8p sbit, int color_type)
-{
-   png_byte buf[4];
-   size_t size;
-
-   png_debug(1, "in png_write_sBIT");
-
-   /* Make sure we don't depend upon the order of PNG_COLOR_8 */
-   if ((color_type & PNG_COLOR_MASK_COLOR) != 0)
-   {
-      png_byte maxbits;
-
-      maxbits = (png_byte)(color_type==PNG_COLOR_TYPE_PALETTE ? 8 :
-          png_ptr->usr_bit_depth);
-
-      if (sbit->red == 0 || sbit->red > maxbits ||
-          sbit->green == 0 || sbit->green > maxbits ||
-          sbit->blue == 0 || sbit->blue > maxbits)
-      {
-         png_warning(png_ptr, "Invalid sBIT depth specified");
-         return;
-      }
-
-      buf[0] = sbit->red;
-      buf[1] = sbit->green;
-      buf[2] = sbit->blue;
-      size = 3;
-   }
-
-   else
-   {
-      if (sbit->gray == 0 || sbit->gray > png_ptr->usr_bit_depth)
-      {
-         png_warning(png_ptr, "Invalid sBIT depth specified");
-         return;
-      }
-
-      buf[0] = sbit->gray;
-      size = 1;
-   }
-
-   if ((color_type & PNG_COLOR_MASK_ALPHA) != 0)
-   {
-      if (sbit->alpha == 0 || sbit->alpha > png_ptr->usr_bit_depth)
-      {
-         png_warning(png_ptr, "Invalid sBIT depth specified");
-         return;
-      }
-
-      buf[size++] = sbit->alpha;
-   }
-
-   png_write_complete_chunk(png_ptr, png_sBIT, buf, size);
-}
-#endif
-
-#ifdef PNG_WRITE_cHRM_SUPPORTED
-/* Write the cHRM chunk */
-void /* PRIVATE */
-png_write_cHRM_fixed(png_structrp png_ptr, const png_xy *xy)
-{
-   png_byte buf[32];
-
-   png_debug(1, "in png_write_cHRM");
-
-   /* Each value is saved in 1/100,000ths */
-   png_save_int_32(buf,      xy->whitex);
-   png_save_int_32(buf +  4, xy->whitey);
-
-   png_save_int_32(buf +  8, xy->redx);
-   png_save_int_32(buf + 12, xy->redy);
-
-   png_save_int_32(buf + 16, xy->greenx);
-   png_save_int_32(buf + 20, xy->greeny);
-
-   png_save_int_32(buf + 24, xy->bluex);
-   png_save_int_32(buf + 28, xy->bluey);
-
-   png_write_complete_chunk(png_ptr, png_cHRM, buf, 32);
-}
-#endif
-
-#ifdef PNG_WRITE_tRNS_SUPPORTED
-/* Write the tRNS chunk */
-void /* PRIVATE */
-png_write_tRNS(png_structrp png_ptr, png_const_bytep trans_alpha,
-    png_const_color_16p tran, int num_trans, int color_type)
-{
-   png_byte buf[6];
-
-   png_debug(1, "in png_write_tRNS");
-
-   if (color_type == PNG_COLOR_TYPE_PALETTE)
-   {
-      if (num_trans <= 0 || num_trans > (int)png_ptr->num_palette)
-      {
-         png_app_warning(png_ptr,
-             "Invalid number of transparent colors specified");
-         return;
-      }
-
-      /* Write the chunk out as it is */
-      png_write_complete_chunk(png_ptr, png_tRNS, trans_alpha,
-          (size_t)num_trans);
-   }
-
-   else if (color_type == PNG_COLOR_TYPE_GRAY)
-   {
-      /* One 16-bit value */
-      if (tran->gray >= (1 << png_ptr->bit_depth))
-      {
-         png_app_warning(png_ptr,
-             "Ignoring attempt to write tRNS chunk out-of-range for bit_depth");
-
-         return;
-      }
-
-      png_save_uint_16(buf, tran->gray);
-      png_write_complete_chunk(png_ptr, png_tRNS, buf, 2);
-   }
-
-   else if (color_type == PNG_COLOR_TYPE_RGB)
-   {
-      /* Three 16-bit values */
-      png_save_uint_16(buf, tran->red);
-      png_save_uint_16(buf + 2, tran->green);
-      png_save_uint_16(buf + 4, tran->blue);
-#ifdef PNG_WRITE_16BIT_SUPPORTED
-      if (png_ptr->bit_depth == 8 && (buf[0] | buf[2] | buf[4]) != 0)
-#else
-      if ((buf[0] | buf[2] | buf[4]) != 0)
-#endif
-      {
-         png_app_warning(png_ptr,
-             "Ignoring attempt to write 16-bit tRNS chunk when bit_depth is 8");
-         return;
-      }
-
-      png_write_complete_chunk(png_ptr, png_tRNS, buf, 6);
-   }
-
-   else
-   {
-      png_app_warning(png_ptr, "Can't write tRNS with an alpha channel");
-   }
-}
-#endif
-
-#ifdef PNG_WRITE_bKGD_SUPPORTED
-/* Write the background chunk */
-void /* PRIVATE */
-png_write_bKGD(png_structrp png_ptr, png_const_color_16p back, int color_type)
-{
-   png_byte buf[6];
-
-   png_debug(1, "in png_write_bKGD");
-
-   if (color_type == PNG_COLOR_TYPE_PALETTE)
-   {
-      if (
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-          (png_ptr->num_palette != 0 ||
-          (png_ptr->mng_features_permitted & PNG_FLAG_MNG_EMPTY_PLTE) == 0) &&
-#endif
-         back->index >= png_ptr->num_palette)
-      {
-         png_warning(png_ptr, "Invalid background palette index");
-         return;
-      }
-
-      buf[0] = back->index;
-      png_write_complete_chunk(png_ptr, png_bKGD, buf, 1);
-   }
-
-   else if ((color_type & PNG_COLOR_MASK_COLOR) != 0)
-   {
-      png_save_uint_16(buf, back->red);
-      png_save_uint_16(buf + 2, back->green);
-      png_save_uint_16(buf + 4, back->blue);
-#ifdef PNG_WRITE_16BIT_SUPPORTED
-      if (png_ptr->bit_depth == 8 && (buf[0] | buf[2] | buf[4]) != 0)
-#else
-      if ((buf[0] | buf[2] | buf[4]) != 0)
-#endif
-      {
-         png_warning(png_ptr,
-             "Ignoring attempt to write 16-bit bKGD chunk "
-             "when bit_depth is 8");
-
-         return;
-      }
-
-      png_write_complete_chunk(png_ptr, png_bKGD, buf, 6);
-   }
-
-   else
-   {
-      if (back->gray >= (1 << png_ptr->bit_depth))
-      {
-         png_warning(png_ptr,
-             "Ignoring attempt to write bKGD chunk out-of-range for bit_depth");
-
-         return;
-      }
-
-      png_save_uint_16(buf, back->gray);
-      png_write_complete_chunk(png_ptr, png_bKGD, buf, 2);
-   }
-}
-#endif
-
-#ifdef PNG_WRITE_eXIf_SUPPORTED
-/* Write the Exif data */
-void /* PRIVATE */
-png_write_eXIf(png_structrp png_ptr, png_bytep exif, int num_exif)
-{
-   int i;
-   png_byte buf[1];
-
-   png_debug(1, "in png_write_eXIf");
-
-   png_write_chunk_header(png_ptr, png_eXIf, (png_uint_32)(num_exif));
-
-   for (i = 0; i < num_exif; i++)
-   {
-      buf[0] = exif[i];
-      png_write_chunk_data(png_ptr, buf, 1);
-   }
-
-   png_write_chunk_end(png_ptr);
-}
-#endif
-
-#ifdef PNG_WRITE_hIST_SUPPORTED
-/* Write the histogram */
-void /* PRIVATE */
-png_write_hIST(png_structrp png_ptr, png_const_uint_16p hist, int num_hist)
-{
-   int i;
-   png_byte buf[3];
-
-   png_debug(1, "in png_write_hIST");
-
-   if (num_hist > (int)png_ptr->num_palette)
-   {
-      png_debug2(3, "num_hist = %d, num_palette = %d", num_hist,
-          png_ptr->num_palette);
-
-      png_warning(png_ptr, "Invalid number of histogram entries specified");
-      return;
-   }
-
-   png_write_chunk_header(png_ptr, png_hIST, (png_uint_32)(num_hist * 2));
-
-   for (i = 0; i < num_hist; i++)
-   {
-      png_save_uint_16(buf, hist[i]);
-      png_write_chunk_data(png_ptr, buf, 2);
-   }
-
-   png_write_chunk_end(png_ptr);
-}
-#endif
-
-#ifdef PNG_WRITE_tEXt_SUPPORTED
-/* Write a tEXt chunk */
-void /* PRIVATE */
-png_write_tEXt(png_structrp png_ptr, png_const_charp key, png_const_charp text,
-    size_t text_len)
-{
-   png_uint_32 key_len;
-   png_byte new_key[80];
-
-   png_debug(1, "in png_write_tEXt");
-
-   key_len = png_check_keyword(png_ptr, key, new_key);
-
-   if (key_len == 0)
-      png_error(png_ptr, "tEXt: invalid keyword");
-
-   if (text == NULL || *text == '\0')
-      text_len = 0;
-
-   else
-      text_len = strlen(text);
-
-   if (text_len > PNG_UINT_31_MAX - (key_len+1))
-      png_error(png_ptr, "tEXt: text too long");
-
-   /* Make sure we include the 0 after the key */
-   png_write_chunk_header(png_ptr, png_tEXt,
-       (png_uint_32)/*checked above*/(key_len + text_len + 1));
-   /*
-    * We leave it to the application to meet PNG-1.0 requirements on the
-    * contents of the text.  PNG-1.0 through PNG-1.2 discourage the use of
-    * any non-Latin-1 characters except for NEWLINE.  ISO PNG will forbid them.
-    * The NUL character is forbidden by PNG-1.0 through PNG-1.2 and ISO PNG.
-    */
-   png_write_chunk_data(png_ptr, new_key, key_len + 1);
-
-   if (text_len != 0)
-      png_write_chunk_data(png_ptr, (png_const_bytep)text, text_len);
-
-   png_write_chunk_end(png_ptr);
-}
-#endif
-
-#ifdef PNG_WRITE_zTXt_SUPPORTED
-/* Write a compressed text chunk */
-void /* PRIVATE */
-png_write_zTXt(png_structrp png_ptr, png_const_charp key, png_const_charp text,
-    int compression)
-{
-   png_uint_32 key_len;
-   png_byte new_key[81];
-   compression_state comp;
-
-   png_debug(1, "in png_write_zTXt");
-
-   if (compression == PNG_TEXT_COMPRESSION_NONE)
-   {
-      png_write_tEXt(png_ptr, key, text, 0);
-      return;
-   }
-
-   if (compression != PNG_TEXT_COMPRESSION_zTXt)
-      png_error(png_ptr, "zTXt: invalid compression type");
-
-   key_len = png_check_keyword(png_ptr, key, new_key);
-
-   if (key_len == 0)
-      png_error(png_ptr, "zTXt: invalid keyword");
-
-   /* Add the compression method and 1 for the keyword separator. */
-   new_key[++key_len] = PNG_COMPRESSION_TYPE_BASE;
-   ++key_len;
-
-   /* Compute the compressed data; do it now for the length */
-   png_text_compress_init(&comp, (png_const_bytep)text,
-       text == NULL ? 0 : strlen(text));
-
-   if (png_text_compress(png_ptr, png_zTXt, &comp, key_len) != Z_OK)
-      png_error(png_ptr, png_ptr->zstream.msg);
-
-   /* Write start of chunk */
-   png_write_chunk_header(png_ptr, png_zTXt, key_len + comp.output_len);
-
-   /* Write key */
-   png_write_chunk_data(png_ptr, new_key, key_len);
-
-   /* Write the compressed data */
-   png_write_compressed_data_out(png_ptr, &comp);
-
-   /* Close the chunk */
-   png_write_chunk_end(png_ptr);
-}
-#endif
-
-#ifdef PNG_WRITE_iTXt_SUPPORTED
-/* Write an iTXt chunk */
-void /* PRIVATE */
-png_write_iTXt(png_structrp png_ptr, int compression, png_const_charp key,
-    png_const_charp lang, png_const_charp lang_key, png_const_charp text)
-{
-   png_uint_32 key_len, prefix_len;
-   size_t lang_len, lang_key_len;
-   png_byte new_key[82];
-   compression_state comp;
-
-   png_debug(1, "in png_write_iTXt");
-
-   key_len = png_check_keyword(png_ptr, key, new_key);
-
-   if (key_len == 0)
-      png_error(png_ptr, "iTXt: invalid keyword");
-
-   /* Set the compression flag */
-   switch (compression)
-   {
-      case PNG_ITXT_COMPRESSION_NONE:
-      case PNG_TEXT_COMPRESSION_NONE:
-         compression = new_key[++key_len] = 0; /* no compression */
-         break;
-
-      case PNG_TEXT_COMPRESSION_zTXt:
-      case PNG_ITXT_COMPRESSION_zTXt:
-         compression = new_key[++key_len] = 1; /* compressed */
-         break;
-
-      default:
-         png_error(png_ptr, "iTXt: invalid compression");
-   }
-
-   new_key[++key_len] = PNG_COMPRESSION_TYPE_BASE;
-   ++key_len; /* for the keywod separator */
-
-   /* We leave it to the application to meet PNG-1.0 requirements on the
-    * contents of the text.  PNG-1.0 through PNG-1.2 discourage the use of
-    * any non-Latin-1 characters except for NEWLINE.  ISO PNG, however,
-    * specifies that the text is UTF-8 and this really doesn't require any
-    * checking.
-    *
-    * The NUL character is forbidden by PNG-1.0 through PNG-1.2 and ISO PNG.
-    *
-    * TODO: validate the language tag correctly (see the spec.)
-    */
-   if (lang == NULL) lang = ""; /* empty language is valid */
-   lang_len = strlen(lang)+1;
-   if (lang_key == NULL) lang_key = ""; /* may be empty */
-   lang_key_len = strlen(lang_key)+1;
-   if (text == NULL) text = ""; /* may be empty */
-
-   prefix_len = key_len;
-   if (lang_len > PNG_UINT_31_MAX-prefix_len)
-      prefix_len = PNG_UINT_31_MAX;
-   else
-      prefix_len = (png_uint_32)(prefix_len + lang_len);
-
-   if (lang_key_len > PNG_UINT_31_MAX-prefix_len)
-      prefix_len = PNG_UINT_31_MAX;
-   else
-      prefix_len = (png_uint_32)(prefix_len + lang_key_len);
-
-   png_text_compress_init(&comp, (png_const_bytep)text, strlen(text));
-
-   if (compression != 0)
-   {
-      if (png_text_compress(png_ptr, png_iTXt, &comp, prefix_len) != Z_OK)
-         png_error(png_ptr, png_ptr->zstream.msg);
-   }
-
-   else
-   {
-      if (comp.input_len > PNG_UINT_31_MAX-prefix_len)
-         png_error(png_ptr, "iTXt: uncompressed text too long");
-
-      /* So the string will fit in a chunk: */
-      comp.output_len = (png_uint_32)/*SAFE*/comp.input_len;
-   }
-
-   png_write_chunk_header(png_ptr, png_iTXt, comp.output_len + prefix_len);
-
-   png_write_chunk_data(png_ptr, new_key, key_len);
-
-   png_write_chunk_data(png_ptr, (png_const_bytep)lang, lang_len);
-
-   png_write_chunk_data(png_ptr, (png_const_bytep)lang_key, lang_key_len);
-
-   if (compression != 0)
-      png_write_compressed_data_out(png_ptr, &comp);
-
-   else
-      png_write_chunk_data(png_ptr, (png_const_bytep)text, comp.output_len);
-
-   png_write_chunk_end(png_ptr);
-}
-#endif
-
-#ifdef PNG_WRITE_oFFs_SUPPORTED
-/* Write the oFFs chunk */
-void /* PRIVATE */
-png_write_oFFs(png_structrp png_ptr, png_int_32 x_offset, png_int_32 y_offset,
-    int unit_type)
-{
-   png_byte buf[9];
-
-   png_debug(1, "in png_write_oFFs");
-
-   if (unit_type >= PNG_OFFSET_LAST)
-      png_warning(png_ptr, "Unrecognized unit type for oFFs chunk");
-
-   png_save_int_32(buf, x_offset);
-   png_save_int_32(buf + 4, y_offset);
-   buf[8] = (png_byte)unit_type;
-
-   png_write_complete_chunk(png_ptr, png_oFFs, buf, 9);
-}
-#endif
-#ifdef PNG_WRITE_pCAL_SUPPORTED
-/* Write the pCAL chunk (described in the PNG extensions document) */
-void /* PRIVATE */
-png_write_pCAL(png_structrp png_ptr, png_charp purpose, png_int_32 X0,
-    png_int_32 X1, int type, int nparams, png_const_charp units,
-    png_charpp params)
-{
-   png_uint_32 purpose_len;
-   size_t units_len, total_len;
-   size_t *params_len;
-   png_byte buf[10];
-   png_byte new_purpose[80];
-   int i;
-
-   png_debug1(1, "in png_write_pCAL (%d parameters)", nparams);
-
-   if (type >= PNG_EQUATION_LAST)
-      png_error(png_ptr, "Unrecognized equation type for pCAL chunk");
-
-   purpose_len = png_check_keyword(png_ptr, purpose, new_purpose);
-
-   if (purpose_len == 0)
-      png_error(png_ptr, "pCAL: invalid keyword");
-
-   ++purpose_len; /* terminator */
-
-   png_debug1(3, "pCAL purpose length = %d", (int)purpose_len);
-   units_len = strlen(units) + (nparams == 0 ? 0 : 1);
-   png_debug1(3, "pCAL units length = %d", (int)units_len);
-   total_len = purpose_len + units_len + 10;
-
-   params_len = (size_t *)png_malloc(png_ptr,
-       (png_alloc_size_t)((png_alloc_size_t)nparams * (sizeof (size_t))));
-
-   /* Find the length of each parameter, making sure we don't count the
-    * null terminator for the last parameter.
-    */
-   for (i = 0; i < nparams; i++)
-   {
-      params_len[i] = strlen(params[i]) + (i == nparams - 1 ? 0 : 1);
-      png_debug2(3, "pCAL parameter %d length = %lu", i,
-          (unsigned long)params_len[i]);
-      total_len += params_len[i];
-   }
-
-   png_debug1(3, "pCAL total length = %d", (int)total_len);
-   png_write_chunk_header(png_ptr, png_pCAL, (png_uint_32)total_len);
-   png_write_chunk_data(png_ptr, new_purpose, purpose_len);
-   png_save_int_32(buf, X0);
-   png_save_int_32(buf + 4, X1);
-   buf[8] = (png_byte)type;
-   buf[9] = (png_byte)nparams;
-   png_write_chunk_data(png_ptr, buf, 10);
-   png_write_chunk_data(png_ptr, (png_const_bytep)units, (size_t)units_len);
-
-   for (i = 0; i < nparams; i++)
-   {
-      png_write_chunk_data(png_ptr, (png_const_bytep)params[i], params_len[i]);
-   }
-
-   png_free(png_ptr, params_len);
-   png_write_chunk_end(png_ptr);
-}
-#endif
-
-#ifdef PNG_WRITE_sCAL_SUPPORTED
-/* Write the sCAL chunk */
-void /* PRIVATE */
-png_write_sCAL_s(png_structrp png_ptr, int unit, png_const_charp width,
-    png_const_charp height)
-{
-   png_byte buf[64];
-   size_t wlen, hlen, total_len;
-
-   png_debug(1, "in png_write_sCAL_s");
-
-   wlen = strlen(width);
-   hlen = strlen(height);
-   total_len = wlen + hlen + 2;
-
-   if (total_len > 64)
-   {
-      png_warning(png_ptr, "Can't write sCAL (buffer too small)");
-      return;
-   }
-
-   buf[0] = (png_byte)unit;
-   memcpy(buf + 1, width, wlen + 1);      /* Append the '\0' here */
-   memcpy(buf + wlen + 2, height, hlen);  /* Do NOT append the '\0' here */
-
-   png_debug1(3, "sCAL total length = %u", (unsigned int)total_len);
-   png_write_complete_chunk(png_ptr, png_sCAL, buf, total_len);
-}
-#endif
-
-#ifdef PNG_WRITE_pHYs_SUPPORTED
-/* Write the pHYs chunk */
-void /* PRIVATE */
-png_write_pHYs(png_structrp png_ptr, png_uint_32 x_pixels_per_unit,
-    png_uint_32 y_pixels_per_unit,
-    int unit_type)
-{
-   png_byte buf[9];
-
-   png_debug(1, "in png_write_pHYs");
-
-   if (unit_type >= PNG_RESOLUTION_LAST)
-      png_warning(png_ptr, "Unrecognized unit type for pHYs chunk");
-
-   png_save_uint_32(buf, x_pixels_per_unit);
-   png_save_uint_32(buf + 4, y_pixels_per_unit);
-   buf[8] = (png_byte)unit_type;
-
-   png_write_complete_chunk(png_ptr, png_pHYs, buf, 9);
-}
-#endif
-
-#ifdef PNG_WRITE_tIME_SUPPORTED
-/* Write the tIME chunk.  Use either png_convert_from_struct_tm()
- * or png_convert_from_time_t(), or fill in the structure yourself.
- */
-void /* PRIVATE */
-png_write_tIME(png_structrp png_ptr, png_const_timep mod_time)
-{
-   png_byte buf[7];
-
-   png_debug(1, "in png_write_tIME");
-
-   if (mod_time->month  > 12 || mod_time->month  < 1 ||
-       mod_time->day    > 31 || mod_time->day    < 1 ||
-       mod_time->hour   > 23 || mod_time->second > 60)
-   {
-      png_warning(png_ptr, "Invalid time specified for tIME chunk");
-      return;
-   }
-
-   png_save_uint_16(buf, mod_time->year);
-   buf[2] = mod_time->month;
-   buf[3] = mod_time->day;
-   buf[4] = mod_time->hour;
-   buf[5] = mod_time->minute;
-   buf[6] = mod_time->second;
-
-   png_write_complete_chunk(png_ptr, png_tIME, buf, 7);
-}
-#endif
-
-/* Initializes the row writing capability of libpng */
-void /* PRIVATE */
-png_write_start_row(png_structrp png_ptr)
-{
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-   /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */
-
-   /* Start of interlace block */
-   static const png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
-
-   /* Offset to next interlace block */
-   static const png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
-
-   /* Start of interlace block in the y direction */
-   static const png_byte png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
-
-   /* Offset to next interlace block in the y direction */
-   static const png_byte png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
-#endif
-
-   png_alloc_size_t buf_size;
-   int usr_pixel_depth;
-
-#ifdef PNG_WRITE_FILTER_SUPPORTED
-   png_byte filters;
-#endif
-
-   png_debug(1, "in png_write_start_row");
-
-   usr_pixel_depth = png_ptr->usr_channels * png_ptr->usr_bit_depth;
-   buf_size = PNG_ROWBYTES(usr_pixel_depth, png_ptr->width) + 1;
-
-   /* 1.5.6: added to allow checking in the row write code. */
-   png_ptr->transformed_pixel_depth = png_ptr->pixel_depth;
-   png_ptr->maximum_pixel_depth = (png_byte)usr_pixel_depth;
-
-   /* Set up row buffer */
-   png_ptr->row_buf = png_voidcast(png_bytep, png_malloc(png_ptr, buf_size));
-
-   png_ptr->row_buf[0] = PNG_FILTER_VALUE_NONE;
-
-#ifdef PNG_WRITE_FILTER_SUPPORTED
-   filters = png_ptr->do_filter;
-
-   if (png_ptr->height == 1)
-      filters &= 0xff & ~(PNG_FILTER_UP|PNG_FILTER_AVG|PNG_FILTER_PAETH);
-
-   if (png_ptr->width == 1)
-      filters &= 0xff & ~(PNG_FILTER_SUB|PNG_FILTER_AVG|PNG_FILTER_PAETH);
-
-   if (filters == 0)
-      filters = PNG_FILTER_NONE;
-
-   png_ptr->do_filter = filters;
-
-   if (((filters & (PNG_FILTER_SUB | PNG_FILTER_UP | PNG_FILTER_AVG |
-       PNG_FILTER_PAETH)) != 0) && png_ptr->try_row == NULL)
-   {
-      int num_filters = 0;
-
-      png_ptr->try_row = png_voidcast(png_bytep, png_malloc(png_ptr, buf_size));
-
-      if (filters & PNG_FILTER_SUB)
-         num_filters++;
-
-      if (filters & PNG_FILTER_UP)
-         num_filters++;
-
-      if (filters & PNG_FILTER_AVG)
-         num_filters++;
-
-      if (filters & PNG_FILTER_PAETH)
-         num_filters++;
-
-      if (num_filters > 1)
-         png_ptr->tst_row = png_voidcast(png_bytep, png_malloc(png_ptr,
-             buf_size));
-   }
-
-   /* We only need to keep the previous row if we are using one of the following
-    * filters.
-    */
-   if ((filters & (PNG_FILTER_AVG | PNG_FILTER_UP | PNG_FILTER_PAETH)) != 0)
-      png_ptr->prev_row = png_voidcast(png_bytep,
-          png_calloc(png_ptr, buf_size));
-#endif /* WRITE_FILTER */
-
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-   /* If interlaced, we need to set up width and height of pass */
-   if (png_ptr->interlaced != 0)
-   {
-      if ((png_ptr->transformations & PNG_INTERLACE) == 0)
-      {
-         png_ptr->num_rows = (png_ptr->height + png_pass_yinc[0] - 1 -
-             png_pass_ystart[0]) / png_pass_yinc[0];
-
-         png_ptr->usr_width = (png_ptr->width + png_pass_inc[0] - 1 -
-             png_pass_start[0]) / png_pass_inc[0];
-      }
-
-      else
-      {
-         png_ptr->num_rows = png_ptr->height;
-         png_ptr->usr_width = png_ptr->width;
-      }
-   }
-
-   else
-#endif
-   {
-      png_ptr->num_rows = png_ptr->height;
-      png_ptr->usr_width = png_ptr->width;
-   }
-}
-
-/* Internal use only.  Called when finished processing a row of data. */
-void /* PRIVATE */
-png_write_finish_row(png_structrp png_ptr)
-{
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-   /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */
-
-   /* Start of interlace block */
-   static const png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
-
-   /* Offset to next interlace block */
-   static const png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
-
-   /* Start of interlace block in the y direction */
-   static const png_byte png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
-
-   /* Offset to next interlace block in the y direction */
-   static const png_byte png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
-#endif
-
-   png_debug(1, "in png_write_finish_row");
-
-   /* Next row */
-   png_ptr->row_number++;
-
-   /* See if we are done */
-   if (png_ptr->row_number < png_ptr->num_rows)
-      return;
-
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-   /* If interlaced, go to next pass */
-   if (png_ptr->interlaced != 0)
-   {
-      png_ptr->row_number = 0;
-      if ((png_ptr->transformations & PNG_INTERLACE) != 0)
-      {
-         png_ptr->pass++;
-      }
-
-      else
-      {
-         /* Loop until we find a non-zero width or height pass */
-         do
-         {
-            png_ptr->pass++;
-
-            if (png_ptr->pass >= 7)
-               break;
-
-            png_ptr->usr_width = (png_ptr->width +
-                png_pass_inc[png_ptr->pass] - 1 -
-                png_pass_start[png_ptr->pass]) /
-                png_pass_inc[png_ptr->pass];
-
-            png_ptr->num_rows = (png_ptr->height +
-                png_pass_yinc[png_ptr->pass] - 1 -
-                png_pass_ystart[png_ptr->pass]) /
-                png_pass_yinc[png_ptr->pass];
-
-            if ((png_ptr->transformations & PNG_INTERLACE) != 0)
-               break;
-
-         } while (png_ptr->usr_width == 0 || png_ptr->num_rows == 0);
-
-      }
-
-      /* Reset the row above the image for the next pass */
-      if (png_ptr->pass < 7)
-      {
-         if (png_ptr->prev_row != NULL)
-            memset(png_ptr->prev_row, 0,
-                PNG_ROWBYTES(png_ptr->usr_channels *
-                png_ptr->usr_bit_depth, png_ptr->width) + 1);
-
-         return;
-      }
-   }
-#endif
-
-   /* If we get here, we've just written the last row, so we need
-      to flush the compressor */
-   png_compress_IDAT(png_ptr, NULL, 0, Z_FINISH);
-}
-
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-/* Pick out the correct pixels for the interlace pass.
- * The basic idea here is to go through the row with a source
- * pointer and a destination pointer (sp and dp), and copy the
- * correct pixels for the pass.  As the row gets compacted,
- * sp will always be >= dp, so we should never overwrite anything.
- * See the default: case for the easiest code to understand.
- */
-void /* PRIVATE */
-png_do_write_interlace(png_row_infop row_info, png_bytep row, int pass)
-{
-   /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */
-
-   /* Start of interlace block */
-   static const png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
-
-   /* Offset to next interlace block */
-   static const png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
-
-   png_debug(1, "in png_do_write_interlace");
-
-   /* We don't have to do anything on the last pass (6) */
-   if (pass < 6)
-   {
-      /* Each pixel depth is handled separately */
-      switch (row_info->pixel_depth)
-      {
-         case 1:
-         {
-            png_bytep sp;
-            png_bytep dp;
-            unsigned int shift;
-            int d;
-            int value;
-            png_uint_32 i;
-            png_uint_32 row_width = row_info->width;
-
-            dp = row;
-            d = 0;
-            shift = 7;
-
-            for (i = png_pass_start[pass]; i < row_width;
-               i += png_pass_inc[pass])
-            {
-               sp = row + (size_t)(i >> 3);
-               value = (int)(*sp >> (7 - (int)(i & 0x07))) & 0x01;
-               d |= (value << shift);
-
-               if (shift == 0)
-               {
-                  shift = 7;
-                  *dp++ = (png_byte)d;
-                  d = 0;
-               }
-
-               else
-                  shift--;
-
-            }
-            if (shift != 7)
-               *dp = (png_byte)d;
-
-            break;
-         }
-
-         case 2:
-         {
-            png_bytep sp;
-            png_bytep dp;
-            unsigned int shift;
-            int d;
-            int value;
-            png_uint_32 i;
-            png_uint_32 row_width = row_info->width;
-
-            dp = row;
-            shift = 6;
-            d = 0;
-
-            for (i = png_pass_start[pass]; i < row_width;
-               i += png_pass_inc[pass])
-            {
-               sp = row + (size_t)(i >> 2);
-               value = (*sp >> ((3 - (int)(i & 0x03)) << 1)) & 0x03;
-               d |= (value << shift);
-
-               if (shift == 0)
-               {
-                  shift = 6;
-                  *dp++ = (png_byte)d;
-                  d = 0;
-               }
-
-               else
-                  shift -= 2;
-            }
-            if (shift != 6)
-               *dp = (png_byte)d;
-
-            break;
-         }
-
-         case 4:
-         {
-            png_bytep sp;
-            png_bytep dp;
-            unsigned int shift;
-            int d;
-            int value;
-            png_uint_32 i;
-            png_uint_32 row_width = row_info->width;
-
-            dp = row;
-            shift = 4;
-            d = 0;
-            for (i = png_pass_start[pass]; i < row_width;
-                i += png_pass_inc[pass])
-            {
-               sp = row + (size_t)(i >> 1);
-               value = (*sp >> ((1 - (int)(i & 0x01)) << 2)) & 0x0f;
-               d |= (value << shift);
-
-               if (shift == 0)
-               {
-                  shift = 4;
-                  *dp++ = (png_byte)d;
-                  d = 0;
-               }
-
-               else
-                  shift -= 4;
-            }
-            if (shift != 4)
-               *dp = (png_byte)d;
-
-            break;
-         }
-
-         default:
-         {
-            png_bytep sp;
-            png_bytep dp;
-            png_uint_32 i;
-            png_uint_32 row_width = row_info->width;
-            size_t pixel_bytes;
-
-            /* Start at the beginning */
-            dp = row;
-
-            /* Find out how many bytes each pixel takes up */
-            pixel_bytes = (row_info->pixel_depth >> 3);
-
-            /* Loop through the row, only looking at the pixels that matter */
-            for (i = png_pass_start[pass]; i < row_width;
-               i += png_pass_inc[pass])
-            {
-               /* Find out where the original pixel is */
-               sp = row + (size_t)i * pixel_bytes;
-
-               /* Move the pixel */
-               if (dp != sp)
-                  memcpy(dp, sp, pixel_bytes);
-
-               /* Next pixel */
-               dp += pixel_bytes;
-            }
-            break;
-         }
-      }
-      /* Set new row width */
-      row_info->width = (row_info->width +
-          png_pass_inc[pass] - 1 -
-          png_pass_start[pass]) /
-          png_pass_inc[pass];
-
-      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,
-          row_info->width);
-   }
-}
-#endif
-
-
-/* This filters the row, chooses which filter to use, if it has not already
- * been specified by the application, and then writes the row out with the
- * chosen filter.
- */
-static void /* PRIVATE */
-png_write_filtered_row(png_structrp png_ptr, png_bytep filtered_row,
-    size_t row_bytes);
-
-#ifdef PNG_WRITE_FILTER_SUPPORTED
-static size_t /* PRIVATE */
-png_setup_sub_row(png_structrp png_ptr, png_uint_32 bpp,
-    size_t row_bytes, size_t lmins)
-{
-   png_bytep rp, dp, lp;
-   size_t i;
-   size_t sum = 0;
-   unsigned int v;
-
-   png_ptr->try_row[0] = PNG_FILTER_VALUE_SUB;
-
-   for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1; i < bpp;
-        i++, rp++, dp++)
-   {
-      v = *dp = *rp;
-#ifdef PNG_USE_ABS
-      sum += 128 - abs((int)v - 128);
-#else
-      sum += (v < 128) ? v : 256 - v;
-#endif
-   }
-
-   for (lp = png_ptr->row_buf + 1; i < row_bytes;
-      i++, rp++, lp++, dp++)
-   {
-      v = *dp = (png_byte)(((int)*rp - (int)*lp) & 0xff);
-#ifdef PNG_USE_ABS
-      sum += 128 - abs((int)v - 128);
-#else
-      sum += (v < 128) ? v : 256 - v;
-#endif
-
-      if (sum > lmins)  /* We are already worse, don't continue. */
-        break;
-   }
-
-   return sum;
-}
-
-static void /* PRIVATE */
-png_setup_sub_row_only(png_structrp png_ptr, png_uint_32 bpp,
-    size_t row_bytes)
-{
-   png_bytep rp, dp, lp;
-   size_t i;
-
-   png_ptr->try_row[0] = PNG_FILTER_VALUE_SUB;
-
-   for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1; i < bpp;
-        i++, rp++, dp++)
-   {
-      *dp = *rp;
-   }
-
-   for (lp = png_ptr->row_buf + 1; i < row_bytes;
-      i++, rp++, lp++, dp++)
-   {
-      *dp = (png_byte)(((int)*rp - (int)*lp) & 0xff);
-   }
-}
-
-static size_t /* PRIVATE */
-png_setup_up_row(png_structrp png_ptr, size_t row_bytes, size_t lmins)
-{
-   png_bytep rp, dp, pp;
-   size_t i;
-   size_t sum = 0;
-   unsigned int v;
-
-   png_ptr->try_row[0] = PNG_FILTER_VALUE_UP;
-
-   for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1,
-       pp = png_ptr->prev_row + 1; i < row_bytes;
-       i++, rp++, pp++, dp++)
-   {
-      v = *dp = (png_byte)(((int)*rp - (int)*pp) & 0xff);
-#ifdef PNG_USE_ABS
-      sum += 128 - abs((int)v - 128);
-#else
-      sum += (v < 128) ? v : 256 - v;
-#endif
-
-      if (sum > lmins)  /* We are already worse, don't continue. */
-        break;
-   }
-
-   return sum;
-}
-static void /* PRIVATE */
-png_setup_up_row_only(png_structrp png_ptr, size_t row_bytes)
-{
-   png_bytep rp, dp, pp;
-   size_t i;
-
-   png_ptr->try_row[0] = PNG_FILTER_VALUE_UP;
-
-   for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1,
-       pp = png_ptr->prev_row + 1; i < row_bytes;
-       i++, rp++, pp++, dp++)
-   {
-      *dp = (png_byte)(((int)*rp - (int)*pp) & 0xff);
-   }
-}
-
-static size_t /* PRIVATE */
-png_setup_avg_row(png_structrp png_ptr, png_uint_32 bpp,
-    size_t row_bytes, size_t lmins)
-{
-   png_bytep rp, dp, pp, lp;
-   png_uint_32 i;
-   size_t sum = 0;
-   unsigned int v;
-
-   png_ptr->try_row[0] = PNG_FILTER_VALUE_AVG;
-
-   for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1,
-       pp = png_ptr->prev_row + 1; i < bpp; i++)
-   {
-      v = *dp++ = (png_byte)(((int)*rp++ - ((int)*pp++ / 2)) & 0xff);
-
-#ifdef PNG_USE_ABS
-      sum += 128 - abs((int)v - 128);
-#else
-      sum += (v < 128) ? v : 256 - v;
-#endif
-   }
-
-   for (lp = png_ptr->row_buf + 1; i < row_bytes; i++)
-   {
-      v = *dp++ = (png_byte)(((int)*rp++ - (((int)*pp++ + (int)*lp++) / 2))
-          & 0xff);
-
-#ifdef PNG_USE_ABS
-      sum += 128 - abs((int)v - 128);
-#else
-      sum += (v < 128) ? v : 256 - v;
-#endif
-
-      if (sum > lmins)  /* We are already worse, don't continue. */
-        break;
-   }
-
-   return sum;
-}
-static void /* PRIVATE */
-png_setup_avg_row_only(png_structrp png_ptr, png_uint_32 bpp,
-    size_t row_bytes)
-{
-   png_bytep rp, dp, pp, lp;
-   png_uint_32 i;
-
-   png_ptr->try_row[0] = PNG_FILTER_VALUE_AVG;
-
-   for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1,
-       pp = png_ptr->prev_row + 1; i < bpp; i++)
-   {
-      *dp++ = (png_byte)(((int)*rp++ - ((int)*pp++ / 2)) & 0xff);
-   }
-
-   for (lp = png_ptr->row_buf + 1; i < row_bytes; i++)
-   {
-      *dp++ = (png_byte)(((int)*rp++ - (((int)*pp++ + (int)*lp++) / 2))
-          & 0xff);
-   }
-}
-
-static size_t /* PRIVATE */
-png_setup_paeth_row(png_structrp png_ptr, png_uint_32 bpp,
-    size_t row_bytes, size_t lmins)
-{
-   png_bytep rp, dp, pp, cp, lp;
-   size_t i;
-   size_t sum = 0;
-   unsigned int v;
-
-   png_ptr->try_row[0] = PNG_FILTER_VALUE_PAETH;
-
-   for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1,
-       pp = png_ptr->prev_row + 1; i < bpp; i++)
-   {
-      v = *dp++ = (png_byte)(((int)*rp++ - (int)*pp++) & 0xff);
-
-#ifdef PNG_USE_ABS
-      sum += 128 - abs((int)v - 128);
-#else
-      sum += (v < 128) ? v : 256 - v;
-#endif
-   }
-
-   for (lp = png_ptr->row_buf + 1, cp = png_ptr->prev_row + 1; i < row_bytes;
-        i++)
-   {
-      int a, b, c, pa, pb, pc, p;
-
-      b = *pp++;
-      c = *cp++;
-      a = *lp++;
-
-      p = b - c;
-      pc = a - c;
-
-#ifdef PNG_USE_ABS
-      pa = abs(p);
-      pb = abs(pc);
-      pc = abs(p + pc);
-#else
-      pa = p < 0 ? -p : p;
-      pb = pc < 0 ? -pc : pc;
-      pc = (p + pc) < 0 ? -(p + pc) : p + pc;
-#endif
-
-      p = (pa <= pb && pa <=pc) ? a : (pb <= pc) ? b : c;
-
-      v = *dp++ = (png_byte)(((int)*rp++ - p) & 0xff);
-
-#ifdef PNG_USE_ABS
-      sum += 128 - abs((int)v - 128);
-#else
-      sum += (v < 128) ? v : 256 - v;
-#endif
-
-      if (sum > lmins)  /* We are already worse, don't continue. */
-        break;
-   }
-
-   return sum;
-}
-static void /* PRIVATE */
-png_setup_paeth_row_only(png_structrp png_ptr, png_uint_32 bpp,
-    size_t row_bytes)
-{
-   png_bytep rp, dp, pp, cp, lp;
-   size_t i;
-
-   png_ptr->try_row[0] = PNG_FILTER_VALUE_PAETH;
-
-   for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1,
-       pp = png_ptr->prev_row + 1; i < bpp; i++)
-   {
-      *dp++ = (png_byte)(((int)*rp++ - (int)*pp++) & 0xff);
-   }
-
-   for (lp = png_ptr->row_buf + 1, cp = png_ptr->prev_row + 1; i < row_bytes;
-        i++)
-   {
-      int a, b, c, pa, pb, pc, p;
-
-      b = *pp++;
-      c = *cp++;
-      a = *lp++;
-
-      p = b - c;
-      pc = a - c;
-
-#ifdef PNG_USE_ABS
-      pa = abs(p);
-      pb = abs(pc);
-      pc = abs(p + pc);
-#else
-      pa = p < 0 ? -p : p;
-      pb = pc < 0 ? -pc : pc;
-      pc = (p + pc) < 0 ? -(p + pc) : p + pc;
-#endif
-
-      p = (pa <= pb && pa <=pc) ? a : (pb <= pc) ? b : c;
-
-      *dp++ = (png_byte)(((int)*rp++ - p) & 0xff);
-   }
-}
-#endif /* WRITE_FILTER */
-
-void /* PRIVATE */
-png_write_find_filter(png_structrp png_ptr, png_row_infop row_info)
-{
-#ifndef PNG_WRITE_FILTER_SUPPORTED
-   png_write_filtered_row(png_ptr, png_ptr->row_buf, row_info->rowbytes+1);
-#else
-   unsigned int filter_to_do = png_ptr->do_filter;
-   png_bytep row_buf;
-   png_bytep best_row;
-   png_uint_32 bpp;
-   size_t mins;
-   size_t row_bytes = row_info->rowbytes;
-
-   png_debug(1, "in png_write_find_filter");
-
-   /* Find out how many bytes offset each pixel is */
-   bpp = (row_info->pixel_depth + 7) >> 3;
-
-   row_buf = png_ptr->row_buf;
-   mins = PNG_SIZE_MAX - 256/* so we can detect potential overflow of the
-                               running sum */;
-
-   /* The prediction method we use is to find which method provides the
-    * smallest value when summing the absolute values of the distances
-    * from zero, using anything >= 128 as negative numbers.  This is known
-    * as the "minimum sum of absolute differences" heuristic.  Other
-    * heuristics are the "weighted minimum sum of absolute differences"
-    * (experimental and can in theory improve compression), and the "zlib
-    * predictive" method (not implemented yet), which does test compressions
-    * of lines using different filter methods, and then chooses the
-    * (series of) filter(s) that give minimum compressed data size (VERY
-    * computationally expensive).
-    *
-    * GRR 980525:  consider also
-    *
-    *   (1) minimum sum of absolute differences from running average (i.e.,
-    *       keep running sum of non-absolute differences & count of bytes)
-    *       [track dispersion, too?  restart average if dispersion too large?]
-    *
-    *  (1b) minimum sum of absolute differences from sliding average, probably
-    *       with window size <= deflate window (usually 32K)
-    *
-    *   (2) minimum sum of squared differences from zero or running average
-    *       (i.e., ~ root-mean-square approach)
-    */
-
-
-   /* We don't need to test the 'no filter' case if this is the only filter
-    * that has been chosen, as it doesn't actually do anything to the data.
-    */
-   best_row = png_ptr->row_buf;
-
-   if (PNG_SIZE_MAX/128 <= row_bytes)
-   {
-      /* Overflow can occur in the calculation, just select the lowest set
-       * filter.
-       */
-      filter_to_do &= 0U-filter_to_do;
-   }
-   else if ((filter_to_do & PNG_FILTER_NONE) != 0 &&
-         filter_to_do != PNG_FILTER_NONE)
-   {
-      /* Overflow not possible and multiple filters in the list, including the
-       * 'none' filter.
-       */
-      png_bytep rp;
-      size_t sum = 0;
-      size_t i;
-      unsigned int v;
-
-      {
-         for (i = 0, rp = row_buf + 1; i < row_bytes; i++, rp++)
-         {
-            v = *rp;
-#ifdef PNG_USE_ABS
-            sum += 128 - abs((int)v - 128);
-#else
-            sum += (v < 128) ? v : 256 - v;
-#endif
-         }
-      }
-
-      mins = sum;
-   }
-
-   /* Sub filter */
-   if (filter_to_do == PNG_FILTER_SUB)
-   /* It's the only filter so no testing is needed */
-   {
-      png_setup_sub_row_only(png_ptr, bpp, row_bytes);
-      best_row = png_ptr->try_row;
-   }
-
-   else if ((filter_to_do & PNG_FILTER_SUB) != 0)
-   {
-      size_t sum;
-      size_t lmins = mins;
-
-      sum = png_setup_sub_row(png_ptr, bpp, row_bytes, lmins);
-
-      if (sum < mins)
-      {
-         mins = sum;
-         best_row = png_ptr->try_row;
-         if (png_ptr->tst_row != NULL)
-         {
-            png_ptr->try_row = png_ptr->tst_row;
-            png_ptr->tst_row = best_row;
-         }
-      }
-   }
-
-   /* Up filter */
-   if (filter_to_do == PNG_FILTER_UP)
-   {
-      png_setup_up_row_only(png_ptr, row_bytes);
-      best_row = png_ptr->try_row;
-   }
-
-   else if ((filter_to_do & PNG_FILTER_UP) != 0)
-   {
-      size_t sum;
-      size_t lmins = mins;
-
-      sum = png_setup_up_row(png_ptr, row_bytes, lmins);
-
-      if (sum < mins)
-      {
-         mins = sum;
-         best_row = png_ptr->try_row;
-         if (png_ptr->tst_row != NULL)
-         {
-            png_ptr->try_row = png_ptr->tst_row;
-            png_ptr->tst_row = best_row;
-         }
-      }
-   }
-
-   /* Avg filter */
-   if (filter_to_do == PNG_FILTER_AVG)
-   {
-      png_setup_avg_row_only(png_ptr, bpp, row_bytes);
-      best_row = png_ptr->try_row;
-   }
-
-   else if ((filter_to_do & PNG_FILTER_AVG) != 0)
-   {
-      size_t sum;
-      size_t lmins = mins;
-
-      sum= png_setup_avg_row(png_ptr, bpp, row_bytes, lmins);
-
-      if (sum < mins)
-      {
-         mins = sum;
-         best_row = png_ptr->try_row;
-         if (png_ptr->tst_row != NULL)
-         {
-            png_ptr->try_row = png_ptr->tst_row;
-            png_ptr->tst_row = best_row;
-         }
-      }
-   }
-
-   /* Paeth filter */
-   if (filter_to_do == PNG_FILTER_PAETH)
-   {
-      png_setup_paeth_row_only(png_ptr, bpp, row_bytes);
-      best_row = png_ptr->try_row;
-   }
-
-   else if ((filter_to_do & PNG_FILTER_PAETH) != 0)
-   {
-      size_t sum;
-      size_t lmins = mins;
-
-      sum = png_setup_paeth_row(png_ptr, bpp, row_bytes, lmins);
-
-      if (sum < mins)
-      {
-         best_row = png_ptr->try_row;
-         if (png_ptr->tst_row != NULL)
-         {
-            png_ptr->try_row = png_ptr->tst_row;
-            png_ptr->tst_row = best_row;
-         }
-      }
-   }
-
-   /* Do the actual writing of the filtered row data from the chosen filter. */
-   png_write_filtered_row(png_ptr, best_row, row_info->rowbytes+1);
-
-#endif /* WRITE_FILTER */
-}
-
-
-/* Do the actual writing of a previously filtered row. */
-static void
-png_write_filtered_row(png_structrp png_ptr, png_bytep filtered_row,
-    size_t full_row_length/*includes filter byte*/)
-{
-   png_debug(1, "in png_write_filtered_row");
-
-   png_debug1(2, "filter = %d", filtered_row[0]);
-
-   png_compress_IDAT(png_ptr, filtered_row, full_row_length, Z_NO_FLUSH);
-
-#ifdef PNG_WRITE_FILTER_SUPPORTED
-   /* Swap the current and previous rows */
-   if (png_ptr->prev_row != NULL)
-   {
-      png_bytep tptr;
-
-      tptr = png_ptr->prev_row;
-      png_ptr->prev_row = png_ptr->row_buf;
-      png_ptr->row_buf = tptr;
-   }
-#endif /* WRITE_FILTER */
-
-   /* Finish row - updates counters and flushes zlib if last row */
-   png_write_finish_row(png_ptr);
-
-#ifdef PNG_WRITE_FLUSH_SUPPORTED
-   png_ptr->flush_rows++;
-
-   if (png_ptr->flush_dist > 0 &&
-       png_ptr->flush_rows >= png_ptr->flush_dist)
-   {
-      png_write_flush(png_ptr);
-   }
-#endif /* WRITE_FLUSH */
-}
-#endif /* WRITE */
diff --git a/dep/zlib/CMakeLists.txt b/dep/zlib/CMakeLists.txt
deleted file mode 100644
index 10f512ccc..000000000
--- a/dep/zlib/CMakeLists.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-add_library(zlib
-  include/zconf.h
-  include/zlib.h
-  src/adler32.c
-  src/compress.c
-  src/crc32.c
-  src/crc32.h
-  src/deflate.c
-  src/deflate.h
-  src/gzclose.c
-  src/gzguts.h
-  src/gzlib.c
-  src/gzread.c
-  src/gzwrite.c
-  src/infback.c
-  src/inffast.c
-  src/inffast.h
-  src/inffixed.h
-  src/inflate.c
-  src/inflate.h
-  src/inftrees.c
-  src/inftrees.h
-  src/trees.c
-  src/trees.h
-  src/uncompr.c
-  src/zutil.c
-  src/zutil.h
-)
-
-target_include_directories(zlib PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include")
-target_include_directories(zlib INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/include")
-disable_compiler_warnings_for_target(zlib)
-add_library(ZLIB::ZLIB ALIAS zlib)
diff --git a/dep/zlib/ChangeLog b/dep/zlib/ChangeLog
deleted file mode 100644
index 30199a65a..000000000
--- a/dep/zlib/ChangeLog
+++ /dev/null
@@ -1,1515 +0,0 @@
-
-                ChangeLog file for zlib
-
-Changes in 1.2.11 (15 Jan 2017)
-- Fix deflate stored bug when pulling last block from window
-- Permit immediate deflateParams changes before any deflate input
-
-Changes in 1.2.10 (2 Jan 2017)
-- Avoid warnings on snprintf() return value
-- Fix bug in deflate_stored() for zero-length input
-- Fix bug in gzwrite.c that produced corrupt gzip files
-- Remove files to be installed before copying them in Makefile.in
-- Add warnings when compiling with assembler code
-
-Changes in 1.2.9 (31 Dec 2016)
-- Fix contrib/minizip to permit unzipping with desktop API [Zouzou]
-- Improve contrib/blast to return unused bytes
-- Assure that gzoffset() is correct when appending
-- Improve compress() and uncompress() to support large lengths
-- Fix bug in test/example.c where error code not saved
-- Remedy Coverity warning [Randers-Pehrson]
-- Improve speed of gzprintf() in transparent mode
-- Fix inflateInit2() bug when windowBits is 16 or 32
-- Change DEBUG macro to ZLIB_DEBUG
-- Avoid uninitialized access by gzclose_w()
-- Allow building zlib outside of the source directory
-- Fix bug that accepted invalid zlib header when windowBits is zero
-- Fix gzseek() problem on MinGW due to buggy _lseeki64 there
-- Loop on write() calls in gzwrite.c in case of non-blocking I/O
-- Add --warn (-w) option to ./configure for more compiler warnings
-- Reject a window size of 256 bytes if not using the zlib wrapper
-- Fix bug when level 0 used with Z_HUFFMAN or Z_RLE
-- Add --debug (-d) option to ./configure to define ZLIB_DEBUG
-- Fix bugs in creating a very large gzip header
-- Add uncompress2() function, which returns the input size used
-- Assure that deflateParams() will not switch functions mid-block
-- Dramatically speed up deflation for level 0 (storing)
-- Add gzfread(), duplicating the interface of fread()
-- Add gzfwrite(), duplicating the interface of fwrite()
-- Add deflateGetDictionary() function
-- Use snprintf() for later versions of Microsoft C
-- Fix *Init macros to use z_ prefix when requested
-- Replace as400 with os400 for OS/400 support [Monnerat]
-- Add crc32_z() and adler32_z() functions with size_t lengths
-- Update Visual Studio project files [AraHaan]
-
-Changes in 1.2.8 (28 Apr 2013)
-- Update contrib/minizip/iowin32.c for Windows RT [Vollant]
-- Do not force Z_CONST for C++
-- Clean up contrib/vstudio [Roß]
-- Correct spelling error in zlib.h
-- Fix mixed line endings in contrib/vstudio
-
-Changes in 1.2.7.3 (13 Apr 2013)
-- Fix version numbers and DLL names in contrib/vstudio/*/zlib.rc
-
-Changes in 1.2.7.2 (13 Apr 2013)
-- Change check for a four-byte type back to hexadecimal
-- Fix typo in win32/Makefile.msc
-- Add casts in gzwrite.c for pointer differences
-
-Changes in 1.2.7.1 (24 Mar 2013)
-- Replace use of unsafe string functions with snprintf if available
-- Avoid including stddef.h on Windows for Z_SOLO compile [Niessink]
-- Fix gzgetc undefine when Z_PREFIX set [Turk]
-- Eliminate use of mktemp in Makefile (not always available)
-- Fix bug in 'F' mode for gzopen()
-- Add inflateGetDictionary() function
-- Correct comment in deflate.h
-- Use _snprintf for snprintf in Microsoft C
-- On Darwin, only use /usr/bin/libtool if libtool is not Apple
-- Delete "--version" file if created by "ar --version" [Richard G.]
-- Fix configure check for veracity of compiler error return codes
-- Fix CMake compilation of static lib for MSVC2010 x64
-- Remove unused variable in infback9.c
-- Fix argument checks in gzlog_compress() and gzlog_write()
-- Clean up the usage of z_const and respect const usage within zlib
-- Clean up examples/gzlog.[ch] comparisons of different types
-- Avoid shift equal to bits in type (caused endless loop)
-- Fix uninitialized value bug in gzputc() introduced by const patches
-- Fix memory allocation error in examples/zran.c [Nor]
-- Fix bug where gzopen(), gzclose() would write an empty file
-- Fix bug in gzclose() when gzwrite() runs out of memory
-- Check for input buffer malloc failure in examples/gzappend.c
-- Add note to contrib/blast to use binary mode in stdio
-- Fix comparisons of differently signed integers in contrib/blast
-- Check for invalid code length codes in contrib/puff
-- Fix serious but very rare decompression bug in inftrees.c
-- Update inflateBack() comments, since inflate() can be faster
-- Use underscored I/O function names for WINAPI_FAMILY
-- Add _tr_flush_bits to the external symbols prefixed by --zprefix
-- Add contrib/vstudio/vc10 pre-build step for static only
-- Quote --version-script argument in CMakeLists.txt
-- Don't specify --version-script on Apple platforms in CMakeLists.txt
-- Fix casting error in contrib/testzlib/testzlib.c
-- Fix types in contrib/minizip to match result of get_crc_table()
-- Simplify contrib/vstudio/vc10 with 'd' suffix
-- Add TOP support to win32/Makefile.msc
-- Suport i686 and amd64 assembler builds in CMakeLists.txt
-- Fix typos in the use of _LARGEFILE64_SOURCE in zconf.h
-- Add vc11 and vc12 build files to contrib/vstudio
-- Add gzvprintf() as an undocumented function in zlib
-- Fix configure for Sun shell
-- Remove runtime check in configure for four-byte integer type
-- Add casts and consts to ease user conversion to C++
-- Add man pages for minizip and miniunzip
-- In Makefile uninstall, don't rm if preceding cd fails
-- Do not return Z_BUF_ERROR if deflateParam() has nothing to write
-
-Changes in 1.2.7 (2 May 2012)
-- Replace use of memmove() with a simple copy for portability
-- Test for existence of strerror
-- Restore gzgetc_ for backward compatibility with 1.2.6
-- Fix build with non-GNU make on Solaris
-- Require gcc 4.0 or later on Mac OS X to use the hidden attribute
-- Include unistd.h for Watcom C
-- Use __WATCOMC__ instead of __WATCOM__
-- Do not use the visibility attribute if NO_VIZ defined
-- Improve the detection of no hidden visibility attribute
-- Avoid using __int64 for gcc or solo compilation
-- Cast to char * in gzprintf to avoid warnings [Zinser]
-- Fix make_vms.com for VAX [Zinser]
-- Don't use library or built-in byte swaps
-- Simplify test and use of gcc hidden attribute
-- Fix bug in gzclose_w() when gzwrite() fails to allocate memory
-- Add "x" (O_EXCL) and "e" (O_CLOEXEC) modes support to gzopen()
-- Fix bug in test/minigzip.c for configure --solo
-- Fix contrib/vstudio project link errors [Mohanathas]
-- Add ability to choose the builder in make_vms.com [Schweda]
-- Add DESTDIR support to mingw32 win32/Makefile.gcc
-- Fix comments in win32/Makefile.gcc for proper usage
-- Allow overriding the default install locations for cmake
-- Generate and install the pkg-config file with cmake
-- Build both a static and a shared version of zlib with cmake
-- Include version symbols for cmake builds
-- If using cmake with MSVC, add the source directory to the includes
-- Remove unneeded EXTRA_CFLAGS from win32/Makefile.gcc [Truta]
-- Move obsolete emx makefile to old [Truta]
-- Allow the use of -Wundef when compiling or using zlib
-- Avoid the use of the -u option with mktemp
-- Improve inflate() documentation on the use of Z_FINISH
-- Recognize clang as gcc
-- Add gzopen_w() in Windows for wide character path names
-- Rename zconf.h in CMakeLists.txt to move it out of the way
-- Add source directory in CMakeLists.txt for building examples
-- Look in build directory for zlib.pc in CMakeLists.txt
-- Remove gzflags from zlibvc.def in vc9 and vc10
-- Fix contrib/minizip compilation in the MinGW environment
-- Update ./configure for Solaris, support --64 [Mooney]
-- Remove -R. from Solaris shared build (possible security issue)
-- Avoid race condition for parallel make (-j) running example
-- Fix type mismatch between get_crc_table() and crc_table
-- Fix parsing of version with "-" in CMakeLists.txt [Snider, Ziegler]
-- Fix the path to zlib.map in CMakeLists.txt
-- Force the native libtool in Mac OS X to avoid GNU libtool [Beebe]
-- Add instructions to win32/Makefile.gcc for shared install [Torri]
-
-Changes in 1.2.6.1 (12 Feb 2012)
-- Avoid the use of the Objective-C reserved name "id"
-- Include io.h in gzguts.h for Microsoft compilers
-- Fix problem with ./configure --prefix and gzgetc macro
-- Include gz_header definition when compiling zlib solo
-- Put gzflags() functionality back in zutil.c
-- Avoid library header include in crc32.c for Z_SOLO
-- Use name in GCC_CLASSIC as C compiler for coverage testing, if set
-- Minor cleanup in contrib/minizip/zip.c [Vollant]
-- Update make_vms.com [Zinser]
-- Remove unnecessary gzgetc_ function
-- Use optimized byte swap operations for Microsoft and GNU [Snyder]
-- Fix minor typo in zlib.h comments [Rzesniowiecki]
-
-Changes in 1.2.6 (29 Jan 2012)
-- Update the Pascal interface in contrib/pascal
-- Fix function numbers for gzgetc_ in zlibvc.def files
-- Fix configure.ac for contrib/minizip [Schiffer]
-- Fix large-entry detection in minizip on 64-bit systems [Schiffer]
-- Have ./configure use the compiler return code for error indication
-- Fix CMakeLists.txt for cross compilation [McClure]
-- Fix contrib/minizip/zip.c for 64-bit architectures [Dalsnes]
-- Fix compilation of contrib/minizip on FreeBSD [Marquez]
-- Correct suggested usages in win32/Makefile.msc [Shachar, Horvath]
-- Include io.h for Turbo C / Borland C on all platforms [Truta]
-- Make version explicit in contrib/minizip/configure.ac [Bosmans]
-- Avoid warning for no encryption in contrib/minizip/zip.c [Vollant]
-- Minor cleanup up contrib/minizip/unzip.c [Vollant]
-- Fix bug when compiling minizip with C++ [Vollant]
-- Protect for long name and extra fields in contrib/minizip [Vollant]
-- Avoid some warnings in contrib/minizip [Vollant]
-- Add -I../.. -L../.. to CFLAGS for minizip and miniunzip
-- Add missing libs to minizip linker command
-- Add support for VPATH builds in contrib/minizip
-- Add an --enable-demos option to contrib/minizip/configure
-- Add the generation of configure.log by ./configure
-- Exit when required parameters not provided to win32/Makefile.gcc
-- Have gzputc return the character written instead of the argument
-- Use the -m option on ldconfig for BSD systems [Tobias]
-- Correct in zlib.map when deflateResetKeep was added
-
-Changes in 1.2.5.3 (15 Jan 2012)
-- Restore gzgetc function for binary compatibility
-- Do not use _lseeki64 under Borland C++ [Truta]
-- Update win32/Makefile.msc to build test/*.c [Truta]
-- Remove old/visualc6 given CMakefile and other alternatives
-- Update AS400 build files and documentation [Monnerat]
-- Update win32/Makefile.gcc to build test/*.c [Truta]
-- Permit stronger flushes after Z_BLOCK flushes
-- Avoid extraneous empty blocks when doing empty flushes
-- Permit Z_NULL arguments to deflatePending
-- Allow deflatePrime() to insert bits in the middle of a stream
-- Remove second empty static block for Z_PARTIAL_FLUSH
-- Write out all of the available bits when using Z_BLOCK
-- Insert the first two strings in the hash table after a flush
-
-Changes in 1.2.5.2 (17 Dec 2011)
-- fix ld error: unable to find version dependency 'ZLIB_1.2.5'
-- use relative symlinks for shared libs
-- Avoid searching past window for Z_RLE strategy
-- Assure that high-water mark initialization is always applied in deflate
-- Add assertions to fill_window() in deflate.c to match comments
-- Update python link in README
-- Correct spelling error in gzread.c
-- Fix bug in gzgets() for a concatenated empty gzip stream
-- Correct error in comment for gz_make()
-- Change gzread() and related to ignore junk after gzip streams
-- Allow gzread() and related to continue after gzclearerr()
-- Allow gzrewind() and gzseek() after a premature end-of-file
-- Simplify gzseek() now that raw after gzip is ignored
-- Change gzgetc() to a macro for speed (~40% speedup in testing)
-- Fix gzclose() to return the actual error last encountered
-- Always add large file support for windows
-- Include zconf.h for windows large file support
-- Include zconf.h.cmakein for windows large file support
-- Update zconf.h.cmakein on make distclean
-- Merge vestigial vsnprintf determination from zutil.h to gzguts.h
-- Clarify how gzopen() appends in zlib.h comments
-- Correct documentation of gzdirect() since junk at end now ignored
-- Add a transparent write mode to gzopen() when 'T' is in the mode
-- Update python link in zlib man page
-- Get inffixed.h and MAKEFIXED result to match
-- Add a ./config --solo option to make zlib subset with no library use
-- Add undocumented inflateResetKeep() function for CAB file decoding
-- Add --cover option to ./configure for gcc coverage testing
-- Add #define ZLIB_CONST option to use const in the z_stream interface
-- Add comment to gzdopen() in zlib.h to use dup() when using fileno()
-- Note behavior of uncompress() to provide as much data as it can
-- Add files in contrib/minizip to aid in building libminizip
-- Split off AR options in Makefile.in and configure
-- Change ON macro to Z_ARG to avoid application conflicts
-- Facilitate compilation with Borland C++ for pragmas and vsnprintf
-- Include io.h for Turbo C / Borland C++
-- Move example.c and minigzip.c to test/
-- Simplify incomplete code table filling in inflate_table()
-- Remove code from inflate.c and infback.c that is impossible to execute
-- Test the inflate code with full coverage
-- Allow deflateSetDictionary, inflateSetDictionary at any time (in raw)
-- Add deflateResetKeep and fix inflateResetKeep to retain dictionary
-- Fix gzwrite.c to accommodate reduced memory zlib compilation
-- Have inflate() with Z_FINISH avoid the allocation of a window
-- Do not set strm->adler when doing raw inflate
-- Fix gzeof() to behave just like feof() when read is not past end of file
-- Fix bug in gzread.c when end-of-file is reached
-- Avoid use of Z_BUF_ERROR in gz* functions except for premature EOF
-- Document gzread() capability to read concurrently written files
-- Remove hard-coding of resource compiler in CMakeLists.txt [Blammo]
-
-Changes in 1.2.5.1 (10 Sep 2011)
-- Update FAQ entry on shared builds (#13)
-- Avoid symbolic argument to chmod in Makefile.in
-- Fix bug and add consts in contrib/puff [Oberhumer]
-- Update contrib/puff/zeros.raw test file to have all block types
-- Add full coverage test for puff in contrib/puff/Makefile
-- Fix static-only-build install in Makefile.in
-- Fix bug in unzGetCurrentFileInfo() in contrib/minizip [Kuno]
-- Add libz.a dependency to shared in Makefile.in for parallel builds
-- Spell out "number" (instead of "nb") in zlib.h for total_in, total_out
-- Replace $(...) with `...` in configure for non-bash sh [Bowler]
-- Add darwin* to Darwin* and solaris* to SunOS\ 5* in configure [Groffen]
-- Add solaris* to Linux* in configure to allow gcc use [Groffen]
-- Add *bsd* to Linux* case in configure [Bar-Lev]
-- Add inffast.obj to dependencies in win32/Makefile.msc
-- Correct spelling error in deflate.h [Kohler]
-- Change libzdll.a again to libz.dll.a (!) in win32/Makefile.gcc
-- Add test to configure for GNU C looking for gcc in output of $cc -v
-- Add zlib.pc generation to win32/Makefile.gcc [Weigelt]
-- Fix bug in zlib.h for _FILE_OFFSET_BITS set and _LARGEFILE64_SOURCE not
-- Add comment in zlib.h that adler32_combine with len2 < 0 makes no sense
-- Make NO_DIVIDE option in adler32.c much faster (thanks to John Reiser)
-- Make stronger test in zconf.h to include unistd.h for LFS
-- Apply Darwin patches for 64-bit file offsets to contrib/minizip [Slack]
-- Fix zlib.h LFS support when Z_PREFIX used
-- Add updated as400 support (removed from old) [Monnerat]
-- Avoid deflate sensitivity to volatile input data
-- Avoid division in adler32_combine for NO_DIVIDE
-- Clarify the use of Z_FINISH with deflateBound() amount of space
-- Set binary for output file in puff.c
-- Use u4 type for crc_table to avoid conversion warnings
-- Apply casts in zlib.h to avoid conversion warnings
-- Add OF to prototypes for adler32_combine_ and crc32_combine_ [Miller]
-- Improve inflateSync() documentation to note indeterminancy
-- Add deflatePending() function to return the amount of pending output
-- Correct the spelling of "specification" in FAQ [Randers-Pehrson]
-- Add a check in configure for stdarg.h, use for gzprintf()
-- Check that pointers fit in ints when gzprint() compiled old style
-- Add dummy name before $(SHAREDLIBV) in Makefile [Bar-Lev, Bowler]
-- Delete line in configure that adds -L. libz.a to LDFLAGS [Weigelt]
-- Add debug records in assmebler code [Londer]
-- Update RFC references to use http://tools.ietf.org/html/... [Li]
-- Add --archs option, use of libtool to configure for Mac OS X [Borstel]
-
-Changes in 1.2.5 (19 Apr 2010)
-- Disable visibility attribute in win32/Makefile.gcc [Bar-Lev]
-- Default to libdir as sharedlibdir in configure [Nieder]
-- Update copyright dates on modified source files
-- Update trees.c to be able to generate modified trees.h
-- Exit configure for MinGW, suggesting win32/Makefile.gcc
-- Check for NULL path in gz_open [Homurlu]
-
-Changes in 1.2.4.5 (18 Apr 2010)
-- Set sharedlibdir in configure [Torok]
-- Set LDFLAGS in Makefile.in [Bar-Lev]
-- Avoid mkdir objs race condition in Makefile.in [Bowler]
-- Add ZLIB_INTERNAL in front of internal inter-module functions and arrays
-- Define ZLIB_INTERNAL to hide internal functions and arrays for GNU C
-- Don't use hidden attribute when it is a warning generator (e.g. Solaris)
-
-Changes in 1.2.4.4 (18 Apr 2010)
-- Fix CROSS_PREFIX executable testing, CHOST extract, mingw* [Torok]
-- Undefine _LARGEFILE64_SOURCE in zconf.h if it is zero, but not if empty
-- Try to use bash or ksh regardless of functionality of /bin/sh
-- Fix configure incompatibility with NetBSD sh
-- Remove attempt to run under bash or ksh since have better NetBSD fix
-- Fix win32/Makefile.gcc for MinGW [Bar-Lev]
-- Add diagnostic messages when using CROSS_PREFIX in configure
-- Added --sharedlibdir option to configure [Weigelt]
-- Use hidden visibility attribute when available [Frysinger]
-
-Changes in 1.2.4.3 (10 Apr 2010)
-- Only use CROSS_PREFIX in configure for ar and ranlib if they exist
-- Use CROSS_PREFIX for nm [Bar-Lev]
-- Assume _LARGEFILE64_SOURCE defined is equivalent to true
-- Avoid use of undefined symbols in #if with && and ||
-- Make *64 prototypes in gzguts.h consistent with functions
-- Add -shared load option for MinGW in configure [Bowler]
-- Move z_off64_t to public interface, use instead of off64_t
-- Remove ! from shell test in configure (not portable to Solaris)
-- Change +0 macro tests to -0 for possibly increased portability
-
-Changes in 1.2.4.2 (9 Apr 2010)
-- Add consistent carriage returns to readme.txt's in masmx86 and masmx64
-- Really provide prototypes for *64 functions when building without LFS
-- Only define unlink() in minigzip.c if unistd.h not included
-- Update README to point to contrib/vstudio project files
-- Move projects/vc6 to old/ and remove projects/
-- Include stdlib.h in minigzip.c for setmode() definition under WinCE
-- Clean up assembler builds in win32/Makefile.msc [Rowe]
-- Include sys/types.h for Microsoft for off_t definition
-- Fix memory leak on error in gz_open()
-- Symbolize nm as $NM in configure [Weigelt]
-- Use TEST_LDSHARED instead of LDSHARED to link test programs [Weigelt]
-- Add +0 to _FILE_OFFSET_BITS and _LFS64_LARGEFILE in case not defined
-- Fix bug in gzeof() to take into account unused input data
-- Avoid initialization of structures with variables in puff.c
-- Updated win32/README-WIN32.txt [Rowe]
-
-Changes in 1.2.4.1 (28 Mar 2010)
-- Remove the use of [a-z] constructs for sed in configure [gentoo 310225]
-- Remove $(SHAREDLIB) from LIBS in Makefile.in [Creech]
-- Restore "for debugging" comment on sprintf() in gzlib.c
-- Remove fdopen for MVS from gzguts.h
-- Put new README-WIN32.txt in win32 [Rowe]
-- Add check for shell to configure and invoke another shell if needed
-- Fix big fat stinking bug in gzseek() on uncompressed files
-- Remove vestigial F_OPEN64 define in zutil.h
-- Set and check the value of _LARGEFILE_SOURCE and _LARGEFILE64_SOURCE
-- Avoid errors on non-LFS systems when applications define LFS macros
-- Set EXE to ".exe" in configure for MINGW [Kahle]
-- Match crc32() in crc32.c exactly to the prototype in zlib.h [Sherrill]
-- Add prefix for cross-compilation in win32/makefile.gcc [Bar-Lev]
-- Add DLL install in win32/makefile.gcc [Bar-Lev]
-- Allow Linux* or linux* from uname in configure [Bar-Lev]
-- Allow ldconfig to be redefined in configure and Makefile.in [Bar-Lev]
-- Add cross-compilation prefixes to configure [Bar-Lev]
-- Match type exactly in gz_load() invocation in gzread.c
-- Match type exactly of zcalloc() in zutil.c to zlib.h alloc_func
-- Provide prototypes for *64 functions when building zlib without LFS
-- Don't use -lc when linking shared library on MinGW
-- Remove errno.h check in configure and vestigial errno code in zutil.h
-
-Changes in 1.2.4 (14 Mar 2010)
-- Fix VER3 extraction in configure for no fourth subversion
-- Update zlib.3, add docs to Makefile.in to make .pdf out of it
-- Add zlib.3.pdf to distribution
-- Don't set error code in gzerror() if passed pointer is NULL
-- Apply destination directory fixes to CMakeLists.txt [Lowman]
-- Move #cmakedefine's to a new zconf.in.cmakein
-- Restore zconf.h for builds that don't use configure or cmake
-- Add distclean to dummy Makefile for convenience
-- Update and improve INDEX, README, and FAQ
-- Update CMakeLists.txt for the return of zconf.h [Lowman]
-- Update contrib/vstudio/vc9 and vc10 [Vollant]
-- Change libz.dll.a back to libzdll.a in win32/Makefile.gcc
-- Apply license and readme changes to contrib/asm686 [Raiter]
-- Check file name lengths and add -c option in minigzip.c [Li]
-- Update contrib/amd64 and contrib/masmx86/ [Vollant]
-- Avoid use of "eof" parameter in trees.c to not shadow library variable
-- Update make_vms.com for removal of zlibdefs.h [Zinser]
-- Update assembler code and vstudio projects in contrib [Vollant]
-- Remove outdated assembler code contrib/masm686 and contrib/asm586
-- Remove old vc7 and vc8 from contrib/vstudio
-- Update win32/Makefile.msc, add ZLIB_VER_SUBREVISION [Rowe]
-- Fix memory leaks in gzclose_r() and gzclose_w(), file leak in gz_open()
-- Add contrib/gcc_gvmat64 for longest_match and inflate_fast [Vollant]
-- Remove *64 functions from win32/zlib.def (they're not 64-bit yet)
-- Fix bug in void-returning vsprintf() case in gzwrite.c
-- Fix name change from inflate.h in contrib/inflate86/inffas86.c
-- Check if temporary file exists before removing in make_vms.com [Zinser]
-- Fix make install and uninstall for --static option
-- Fix usage of _MSC_VER in gzguts.h and zutil.h [Truta]
-- Update readme.txt in contrib/masmx64 and masmx86 to assemble
-
-Changes in 1.2.3.9 (21 Feb 2010)
-- Expunge gzio.c
-- Move as400 build information to old
-- Fix updates in contrib/minizip and contrib/vstudio
-- Add const to vsnprintf test in configure to avoid warnings [Weigelt]
-- Delete zconf.h (made by configure) [Weigelt]
-- Change zconf.in.h to zconf.h.in per convention [Weigelt]
-- Check for NULL buf in gzgets()
-- Return empty string for gzgets() with len == 1 (like fgets())
-- Fix description of gzgets() in zlib.h for end-of-file, NULL return
-- Update minizip to 1.1 [Vollant]
-- Avoid MSVC loss of data warnings in gzread.c, gzwrite.c
-- Note in zlib.h that gzerror() should be used to distinguish from EOF
-- Remove use of snprintf() from gzlib.c
-- Fix bug in gzseek()
-- Update contrib/vstudio, adding vc9 and vc10 [Kuno, Vollant]
-- Fix zconf.h generation in CMakeLists.txt [Lowman]
-- Improve comments in zconf.h where modified by configure
-
-Changes in 1.2.3.8 (13 Feb 2010)
-- Clean up text files (tabs, trailing whitespace, etc.) [Oberhumer]
-- Use z_off64_t in gz_zero() and gz_skip() to match state->skip
-- Avoid comparison problem when sizeof(int) == sizeof(z_off64_t)
-- Revert to Makefile.in from 1.2.3.6 (live with the clutter)
-- Fix missing error return in gzflush(), add zlib.h note
-- Add *64 functions to zlib.map [Levin]
-- Fix signed/unsigned comparison in gz_comp()
-- Use SFLAGS when testing shared linking in configure
-- Add --64 option to ./configure to use -m64 with gcc
-- Fix ./configure --help to correctly name options
-- Have make fail if a test fails [Levin]
-- Avoid buffer overrun in contrib/masmx64/gvmat64.asm [Simpson]
-- Remove assembler object files from contrib
-
-Changes in 1.2.3.7 (24 Jan 2010)
-- Always gzopen() with O_LARGEFILE if available
-- Fix gzdirect() to work immediately after gzopen() or gzdopen()
-- Make gzdirect() more precise when the state changes while reading
-- Improve zlib.h documentation in many places
-- Catch memory allocation failure in gz_open()
-- Complete close operation if seek forward in gzclose_w() fails
-- Return Z_ERRNO from gzclose_r() if close() fails
-- Return Z_STREAM_ERROR instead of EOF for gzclose() being passed NULL
-- Return zero for gzwrite() errors to match zlib.h description
-- Return -1 on gzputs() error to match zlib.h description
-- Add zconf.in.h to allow recovery from configure modification [Weigelt]
-- Fix static library permissions in Makefile.in [Weigelt]
-- Avoid warnings in configure tests that hide functionality [Weigelt]
-- Add *BSD and DragonFly to Linux case in configure [gentoo 123571]
-- Change libzdll.a to libz.dll.a in win32/Makefile.gcc [gentoo 288212]
-- Avoid access of uninitialized data for first inflateReset2 call [Gomes]
-- Keep object files in subdirectories to reduce the clutter somewhat
-- Remove default Makefile and zlibdefs.h, add dummy Makefile
-- Add new external functions to Z_PREFIX, remove duplicates, z_z_ -> z_
-- Remove zlibdefs.h completely -- modify zconf.h instead
-
-Changes in 1.2.3.6 (17 Jan 2010)
-- Avoid void * arithmetic in gzread.c and gzwrite.c
-- Make compilers happier with const char * for gz_error message
-- Avoid unused parameter warning in inflate.c
-- Avoid signed-unsigned comparison warning in inflate.c
-- Indent #pragma's for traditional C
-- Fix usage of strwinerror() in glib.c, change to gz_strwinerror()
-- Correct email address in configure for system options
-- Update make_vms.com and add make_vms.com to contrib/minizip [Zinser]
-- Update zlib.map [Brown]
-- Fix Makefile.in for Solaris 10 make of example64 and minizip64 [Torok]
-- Apply various fixes to CMakeLists.txt [Lowman]
-- Add checks on len in gzread() and gzwrite()
-- Add error message for no more room for gzungetc()
-- Remove zlib version check in gzwrite()
-- Defer compression of gzprintf() result until need to
-- Use snprintf() in gzdopen() if available
-- Remove USE_MMAP configuration determination (only used by minigzip)
-- Remove examples/pigz.c (available separately)
-- Update examples/gun.c to 1.6
-
-Changes in 1.2.3.5 (8 Jan 2010)
-- Add space after #if in zutil.h for some compilers
-- Fix relatively harmless bug in deflate_fast() [Exarevsky]
-- Fix same problem in deflate_slow()
-- Add $(SHAREDLIBV) to LIBS in Makefile.in [Brown]
-- Add deflate_rle() for faster Z_RLE strategy run-length encoding
-- Add deflate_huff() for faster Z_HUFFMAN_ONLY encoding
-- Change name of "write" variable in inffast.c to avoid library collisions
-- Fix premature EOF from gzread() in gzio.c [Brown]
-- Use zlib header window size if windowBits is 0 in inflateInit2()
-- Remove compressBound() call in deflate.c to avoid linking compress.o
-- Replace use of errno in gz* with functions, support WinCE [Alves]
-- Provide alternative to perror() in minigzip.c for WinCE [Alves]
-- Don't use _vsnprintf on later versions of MSVC [Lowman]
-- Add CMake build script and input file [Lowman]
-- Update contrib/minizip to 1.1 [Svensson, Vollant]
-- Moved nintendods directory from contrib to .
-- Replace gzio.c with a new set of routines with the same functionality
-- Add gzbuffer(), gzoffset(), gzclose_r(), gzclose_w() as part of above
-- Update contrib/minizip to 1.1b
-- Change gzeof() to return 0 on error instead of -1 to agree with zlib.h
-
-Changes in 1.2.3.4 (21 Dec 2009)
-- Use old school .SUFFIXES in Makefile.in for FreeBSD compatibility
-- Update comments in configure and Makefile.in for default --shared
-- Fix test -z's in configure [Marquess]
-- Build examplesh and minigzipsh when not testing
-- Change NULL's to Z_NULL's in deflate.c and in comments in zlib.h
-- Import LDFLAGS from the environment in configure
-- Fix configure to populate SFLAGS with discovered CFLAGS options
-- Adapt make_vms.com to the new Makefile.in [Zinser]
-- Add zlib2ansi script for C++ compilation [Marquess]
-- Add _FILE_OFFSET_BITS=64 test to make test (when applicable)
-- Add AMD64 assembler code for longest match to contrib [Teterin]
-- Include options from $SFLAGS when doing $LDSHARED
-- Simplify 64-bit file support by introducing z_off64_t type
-- Make shared object files in objs directory to work around old Sun cc
-- Use only three-part version number for Darwin shared compiles
-- Add rc option to ar in Makefile.in for when ./configure not run
-- Add -WI,-rpath,. to LDFLAGS for OSF 1 V4*
-- Set LD_LIBRARYN32_PATH for SGI IRIX shared compile
-- Protect against _FILE_OFFSET_BITS being defined when compiling zlib
-- Rename Makefile.in targets allstatic to static and allshared to shared
-- Fix static and shared Makefile.in targets to be independent
-- Correct error return bug in gz_open() by setting state [Brown]
-- Put spaces before ;;'s in configure for better sh compatibility
-- Add pigz.c (parallel implementation of gzip) to examples/
-- Correct constant in crc32.c to UL [Leventhal]
-- Reject negative lengths in crc32_combine()
-- Add inflateReset2() function to work like inflateEnd()/inflateInit2()
-- Include sys/types.h for _LARGEFILE64_SOURCE [Brown]
-- Correct typo in doc/algorithm.txt [Janik]
-- Fix bug in adler32_combine() [Zhu]
-- Catch missing-end-of-block-code error in all inflates and in puff
-    Assures that random input to inflate eventually results in an error
-- Added enough.c (calculation of ENOUGH for inftrees.h) to examples/
-- Update ENOUGH and its usage to reflect discovered bounds
-- Fix gzerror() error report on empty input file [Brown]
-- Add ush casts in trees.c to avoid pedantic runtime errors
-- Fix typo in zlib.h uncompress() description [Reiss]
-- Correct inflate() comments with regard to automatic header detection
-- Remove deprecation comment on Z_PARTIAL_FLUSH (it stays)
-- Put new version of gzlog (2.0) in examples with interruption recovery
-- Add puff compile option to permit invalid distance-too-far streams
-- Add puff TEST command options, ability to read piped input
-- Prototype the *64 functions in zlib.h when _FILE_OFFSET_BITS == 64, but
-  _LARGEFILE64_SOURCE not defined
-- Fix Z_FULL_FLUSH to truly erase the past by resetting s->strstart
-- Fix deflateSetDictionary() to use all 32K for output consistency
-- Remove extraneous #define MIN_LOOKAHEAD in deflate.c (in deflate.h)
-- Clear bytes after deflate lookahead to avoid use of uninitialized data
-- Change a limit in inftrees.c to be more transparent to Coverity Prevent
-- Update win32/zlib.def with exported symbols from zlib.h
-- Correct spelling errors in zlib.h [Willem, Sobrado]
-- Allow Z_BLOCK for deflate() to force a new block
-- Allow negative bits in inflatePrime() to delete existing bit buffer
-- Add Z_TREES flush option to inflate() to return at end of trees
-- Add inflateMark() to return current state information for random access
-- Add Makefile for NintendoDS to contrib [Costa]
-- Add -w in configure compile tests to avoid spurious warnings [Beucler]
-- Fix typos in zlib.h comments for deflateSetDictionary()
-- Fix EOF detection in transparent gzread() [Maier]
-
-Changes in 1.2.3.3 (2 October 2006)
-- Make --shared the default for configure, add a --static option
-- Add compile option to permit invalid distance-too-far streams
-- Add inflateUndermine() function which is required to enable above
-- Remove use of "this" variable name for C++ compatibility [Marquess]
-- Add testing of shared library in make test, if shared library built
-- Use ftello() and fseeko() if available instead of ftell() and fseek()
-- Provide two versions of all functions that use the z_off_t type for
-  binary compatibility -- a normal version and a 64-bit offset version,
-  per the Large File Support Extension when _LARGEFILE64_SOURCE is
-  defined; use the 64-bit versions by default when _FILE_OFFSET_BITS
-  is defined to be 64
-- Add a --uname= option to configure to perhaps help with cross-compiling
-
-Changes in 1.2.3.2 (3 September 2006)
-- Turn off silly Borland warnings [Hay]
-- Use off64_t and define _LARGEFILE64_SOURCE when present
-- Fix missing dependency on inffixed.h in Makefile.in
-- Rig configure --shared to build both shared and static [Teredesai, Truta]
-- Remove zconf.in.h and instead create a new zlibdefs.h file
-- Fix contrib/minizip/unzip.c non-encrypted after encrypted [Vollant]
-- Add treebuild.xml (see http://treebuild.metux.de/) [Weigelt]
-
-Changes in 1.2.3.1 (16 August 2006)
-- Add watcom directory with OpenWatcom make files [Daniel]
-- Remove #undef of FAR in zconf.in.h for MVS [Fedtke]
-- Update make_vms.com [Zinser]
-- Use -fPIC for shared build in configure [Teredesai, Nicholson]
-- Use only major version number for libz.so on IRIX and OSF1 [Reinholdtsen]
-- Use fdopen() (not _fdopen()) for Interix in zutil.h [Bäck]
-- Add some FAQ entries about the contrib directory
-- Update the MVS question in the FAQ
-- Avoid extraneous reads after EOF in gzio.c [Brown]
-- Correct spelling of "successfully" in gzio.c [Randers-Pehrson]
-- Add comments to zlib.h about gzerror() usage [Brown]
-- Set extra flags in gzip header in gzopen() like deflate() does
-- Make configure options more compatible with double-dash conventions
-  [Weigelt]
-- Clean up compilation under Solaris SunStudio cc [Rowe, Reinholdtsen]
-- Fix uninstall target in Makefile.in [Truta]
-- Add pkgconfig support [Weigelt]
-- Use $(DESTDIR) macro in Makefile.in [Reinholdtsen, Weigelt]
-- Replace set_data_type() with a more accurate detect_data_type() in
-  trees.c, according to the txtvsbin.txt document [Truta]
-- Swap the order of #include <stdio.h> and #include "zlib.h" in
-  gzio.c, example.c and minigzip.c [Truta]
-- Shut up annoying VS2005 warnings about standard C deprecation [Rowe,
-  Truta] (where?)
-- Fix target "clean" from win32/Makefile.bor [Truta]
-- Create .pdb and .manifest files in win32/makefile.msc [Ziegler, Rowe]
-- Update zlib www home address in win32/DLL_FAQ.txt [Truta]
-- Update contrib/masmx86/inffas32.asm for VS2005 [Vollant, Van Wassenhove]
-- Enable browse info in the "Debug" and "ASM Debug" configurations in
-  the Visual C++ 6 project, and set (non-ASM) "Debug" as default [Truta]
-- Add pkgconfig support [Weigelt]
-- Add ZLIB_VER_MAJOR, ZLIB_VER_MINOR and ZLIB_VER_REVISION in zlib.h,
-  for use in win32/zlib1.rc [Polushin, Rowe, Truta]
-- Add a document that explains the new text detection scheme to
-  doc/txtvsbin.txt [Truta]
-- Add rfc1950.txt, rfc1951.txt and rfc1952.txt to doc/ [Truta]
-- Move algorithm.txt into doc/ [Truta]
-- Synchronize FAQ with website
-- Fix compressBound(), was low for some pathological cases [Fearnley]
-- Take into account wrapper variations in deflateBound()
-- Set examples/zpipe.c input and output to binary mode for Windows
-- Update examples/zlib_how.html with new zpipe.c (also web site)
-- Fix some warnings in examples/gzlog.c and examples/zran.c (it seems
-  that gcc became pickier in 4.0)
-- Add zlib.map for Linux: "All symbols from zlib-1.1.4 remain
-  un-versioned, the patch adds versioning only for symbols introduced in
-  zlib-1.2.0 or later.  It also declares as local those symbols which are
-  not designed to be exported." [Levin]
-- Update Z_PREFIX list in zconf.in.h, add --zprefix option to configure
-- Do not initialize global static by default in trees.c, add a response
-  NO_INIT_GLOBAL_POINTERS to initialize them if needed [Marquess]
-- Don't use strerror() in gzio.c under WinCE [Yakimov]
-- Don't use errno.h in zutil.h under WinCE [Yakimov]
-- Move arguments for AR to its usage to allow replacing ar [Marot]
-- Add HAVE_VISIBILITY_PRAGMA in zconf.in.h for Mozilla [Randers-Pehrson]
-- Improve inflateInit() and inflateInit2() documentation
-- Fix structure size comment in inflate.h
-- Change configure help option from --h* to --help [Santos]
-
-Changes in 1.2.3 (18 July 2005)
-- Apply security vulnerability fixes to contrib/infback9 as well
-- Clean up some text files (carriage returns, trailing space)
-- Update testzlib, vstudio, masmx64, and masmx86 in contrib [Vollant]
-
-Changes in 1.2.2.4 (11 July 2005)
-- Add inflatePrime() function for starting inflation at bit boundary
-- Avoid some Visual C warnings in deflate.c
-- Avoid more silly Visual C warnings in inflate.c and inftrees.c for 64-bit
-  compile
-- Fix some spelling errors in comments [Betts]
-- Correct inflateInit2() error return documentation in zlib.h
-- Add zran.c example of compressed data random access to examples
-  directory, shows use of inflatePrime()
-- Fix cast for assignments to strm->state in inflate.c and infback.c
-- Fix zlibCompileFlags() in zutil.c to use 1L for long shifts [Oberhumer]
-- Move declarations of gf2 functions to right place in crc32.c [Oberhumer]
-- Add cast in trees.c t avoid a warning [Oberhumer]
-- Avoid some warnings in fitblk.c, gun.c, gzjoin.c in examples [Oberhumer]
-- Update make_vms.com [Zinser]
-- Initialize state->write in inflateReset() since copied in inflate_fast()
-- Be more strict on incomplete code sets in inflate_table() and increase
-  ENOUGH and MAXD -- this repairs a possible security vulnerability for
-  invalid inflate input.  Thanks to Tavis Ormandy and Markus Oberhumer for
-  discovering the vulnerability and providing test cases.
-- Add ia64 support to configure for HP-UX [Smith]
-- Add error return to gzread() for format or i/o error [Levin]
-- Use malloc.h for OS/2 [Necasek]
-
-Changes in 1.2.2.3 (27 May 2005)
-- Replace 1U constants in inflate.c and inftrees.c for 64-bit compile
-- Typecast fread() return values in gzio.c [Vollant]
-- Remove trailing space in minigzip.c outmode (VC++ can't deal with it)
-- Fix crc check bug in gzread() after gzungetc() [Heiner]
-- Add the deflateTune() function to adjust internal compression parameters
-- Add a fast gzip decompressor, gun.c, to examples (use of inflateBack)
-- Remove an incorrect assertion in examples/zpipe.c
-- Add C++ wrapper in infback9.h [Donais]
-- Fix bug in inflateCopy() when decoding fixed codes
-- Note in zlib.h how much deflateSetDictionary() actually uses
-- Remove USE_DICT_HEAD in deflate.c (would mess up inflate if used)
-- Add _WIN32_WCE to define WIN32 in zconf.in.h [Spencer]
-- Don't include stderr.h or errno.h for _WIN32_WCE in zutil.h [Spencer]
-- Add gzdirect() function to indicate transparent reads
-- Update contrib/minizip [Vollant]
-- Fix compilation of deflate.c when both ASMV and FASTEST [Oberhumer]
-- Add casts in crc32.c to avoid warnings [Oberhumer]
-- Add contrib/masmx64 [Vollant]
-- Update contrib/asm586, asm686, masmx86, testzlib, vstudio [Vollant]
-
-Changes in 1.2.2.2 (30 December 2004)
-- Replace structure assignments in deflate.c and inflate.c with zmemcpy to
-  avoid implicit memcpy calls (portability for no-library compilation)
-- Increase sprintf() buffer size in gzdopen() to allow for large numbers
-- Add INFLATE_STRICT to check distances against zlib header
-- Improve WinCE errno handling and comments [Chang]
-- Remove comment about no gzip header processing in FAQ
-- Add Z_FIXED strategy option to deflateInit2() to force fixed trees
-- Add updated make_vms.com [Coghlan], update README
-- Create a new "examples" directory, move gzappend.c there, add zpipe.c,
-  fitblk.c, gzlog.[ch], gzjoin.c, and zlib_how.html.
-- Add FAQ entry and comments in deflate.c on uninitialized memory access
-- Add Solaris 9 make options in configure [Gilbert]
-- Allow strerror() usage in gzio.c for STDC
-- Fix DecompressBuf in contrib/delphi/ZLib.pas [ManChesTer]
-- Update contrib/masmx86/inffas32.asm and gvmat32.asm [Vollant]
-- Use z_off_t for adler32_combine() and crc32_combine() lengths
-- Make adler32() much faster for small len
-- Use OS_CODE in deflate() default gzip header
-
-Changes in 1.2.2.1 (31 October 2004)
-- Allow inflateSetDictionary() call for raw inflate
-- Fix inflate header crc check bug for file names and comments
-- Add deflateSetHeader() and gz_header structure for custom gzip headers
-- Add inflateGetheader() to retrieve gzip headers
-- Add crc32_combine() and adler32_combine() functions
-- Add alloc_func, free_func, in_func, out_func to Z_PREFIX list
-- Use zstreamp consistently in zlib.h (inflate_back functions)
-- Remove GUNZIP condition from definition of inflate_mode in inflate.h
-  and in contrib/inflate86/inffast.S [Truta, Anderson]
-- Add support for AMD64 in contrib/inflate86/inffas86.c [Anderson]
-- Update projects/README.projects and projects/visualc6 [Truta]
-- Update win32/DLL_FAQ.txt [Truta]
-- Avoid warning under NO_GZCOMPRESS in gzio.c; fix typo [Truta]
-- Deprecate Z_ASCII; use Z_TEXT instead [Truta]
-- Use a new algorithm for setting strm->data_type in trees.c [Truta]
-- Do not define an exit() prototype in zutil.c unless DEBUG defined
-- Remove prototype of exit() from zutil.c, example.c, minigzip.c [Truta]
-- Add comment in zlib.h for Z_NO_FLUSH parameter to deflate()
-- Fix Darwin build version identification [Peterson]
-
-Changes in 1.2.2 (3 October 2004)
-- Update zlib.h comments on gzip in-memory processing
-- Set adler to 1 in inflateReset() to support Java test suite [Walles]
-- Add contrib/dotzlib [Ravn]
-- Update win32/DLL_FAQ.txt [Truta]
-- Update contrib/minizip [Vollant]
-- Move contrib/visual-basic.txt to old/ [Truta]
-- Fix assembler builds in projects/visualc6/ [Truta]
-
-Changes in 1.2.1.2 (9 September 2004)
-- Update INDEX file
-- Fix trees.c to update strm->data_type (no one ever noticed!)
-- Fix bug in error case in inflate.c, infback.c, and infback9.c [Brown]
-- Add "volatile" to crc table flag declaration (for DYNAMIC_CRC_TABLE)
-- Add limited multitasking protection to DYNAMIC_CRC_TABLE
-- Add NO_vsnprintf for VMS in zutil.h [Mozilla]
-- Don't declare strerror() under VMS [Mozilla]
-- Add comment to DYNAMIC_CRC_TABLE to use get_crc_table() to initialize
-- Update contrib/ada [Anisimkov]
-- Update contrib/minizip [Vollant]
-- Fix configure to not hardcode directories for Darwin [Peterson]
-- Fix gzio.c to not return error on empty files [Brown]
-- Fix indentation; update version in contrib/delphi/ZLib.pas and
-  contrib/pascal/zlibpas.pas [Truta]
-- Update mkasm.bat in contrib/masmx86 [Truta]
-- Update contrib/untgz [Truta]
-- Add projects/README.projects [Truta]
-- Add project for MS Visual C++ 6.0 in projects/visualc6 [Cadieux, Truta]
-- Update win32/DLL_FAQ.txt [Truta]
-- Update list of Z_PREFIX symbols in zconf.h [Randers-Pehrson, Truta]
-- Remove an unnecessary assignment to curr in inftrees.c [Truta]
-- Add OS/2 to exe builds in configure [Poltorak]
-- Remove err dummy parameter in zlib.h [Kientzle]
-
-Changes in 1.2.1.1 (9 January 2004)
-- Update email address in README
-- Several FAQ updates
-- Fix a big fat bug in inftrees.c that prevented decoding valid
-  dynamic blocks with only literals and no distance codes --
-  Thanks to "Hot Emu" for the bug report and sample file
-- Add a note to puff.c on no distance codes case.
-
-Changes in 1.2.1 (17 November 2003)
-- Remove a tab in contrib/gzappend/gzappend.c
-- Update some interfaces in contrib for new zlib functions
-- Update zlib version number in some contrib entries
-- Add Windows CE definition for ptrdiff_t in zutil.h [Mai, Truta]
-- Support shared libraries on Hurd and KFreeBSD [Brown]
-- Fix error in NO_DIVIDE option of adler32.c
-
-Changes in 1.2.0.8 (4 November 2003)
-- Update version in contrib/delphi/ZLib.pas and contrib/pascal/zlibpas.pas
-- Add experimental NO_DIVIDE #define in adler32.c
-    - Possibly faster on some processors (let me know if it is)
-- Correct Z_BLOCK to not return on first inflate call if no wrap
-- Fix strm->data_type on inflate() return to correctly indicate EOB
-- Add deflatePrime() function for appending in the middle of a byte
-- Add contrib/gzappend for an example of appending to a stream
-- Update win32/DLL_FAQ.txt [Truta]
-- Delete Turbo C comment in README [Truta]
-- Improve some indentation in zconf.h [Truta]
-- Fix infinite loop on bad input in configure script [Church]
-- Fix gzeof() for concatenated gzip files [Johnson]
-- Add example to contrib/visual-basic.txt [Michael B.]
-- Add -p to mkdir's in Makefile.in [vda]
-- Fix configure to properly detect presence or lack of printf functions
-- Add AS400 support [Monnerat]
-- Add a little Cygwin support [Wilson]
-
-Changes in 1.2.0.7 (21 September 2003)
-- Correct some debug formats in contrib/infback9
-- Cast a type in a debug statement in trees.c
-- Change search and replace delimiter in configure from % to # [Beebe]
-- Update contrib/untgz to 0.2 with various fixes [Truta]
-- Add build support for Amiga [Nikl]
-- Remove some directories in old that have been updated to 1.2
-- Add dylib building for Mac OS X in configure and Makefile.in
-- Remove old distribution stuff from Makefile
-- Update README to point to DLL_FAQ.txt, and add comment on Mac OS X
-- Update links in README
-
-Changes in 1.2.0.6 (13 September 2003)
-- Minor FAQ updates
-- Update contrib/minizip to 1.00 [Vollant]
-- Remove test of gz functions in example.c when GZ_COMPRESS defined [Truta]
-- Update POSTINC comment for 68060 [Nikl]
-- Add contrib/infback9 with deflate64 decoding (unsupported)
-- For MVS define NO_vsnprintf and undefine FAR [van Burik]
-- Add pragma for fdopen on MVS [van Burik]
-
-Changes in 1.2.0.5 (8 September 2003)
-- Add OF to inflateBackEnd() declaration in zlib.h
-- Remember start when using gzdopen in the middle of a file
-- Use internal off_t counters in gz* functions to properly handle seeks
-- Perform more rigorous check for distance-too-far in inffast.c
-- Add Z_BLOCK flush option to return from inflate at block boundary
-- Set strm->data_type on return from inflate
-    - Indicate bits unused, if at block boundary, and if in last block
-- Replace size_t with ptrdiff_t in crc32.c, and check for correct size
-- Add condition so old NO_DEFLATE define still works for compatibility
-- FAQ update regarding the Windows DLL [Truta]
-- INDEX update: add qnx entry, remove aix entry [Truta]
-- Install zlib.3 into mandir [Wilson]
-- Move contrib/zlib_dll_FAQ.txt to win32/DLL_FAQ.txt; update [Truta]
-- Adapt the zlib interface to the new DLL convention guidelines [Truta]
-- Introduce ZLIB_WINAPI macro to allow the export of functions using
-  the WINAPI calling convention, for Visual Basic [Vollant, Truta]
-- Update msdos and win32 scripts and makefiles [Truta]
-- Export symbols by name, not by ordinal, in win32/zlib.def [Truta]
-- Add contrib/ada [Anisimkov]
-- Move asm files from contrib/vstudio/vc70_32 to contrib/asm386 [Truta]
-- Rename contrib/asm386 to contrib/masmx86 [Truta, Vollant]
-- Add contrib/masm686 [Truta]
-- Fix offsets in contrib/inflate86 and contrib/masmx86/inffas32.asm
-  [Truta, Vollant]
-- Update contrib/delphi; rename to contrib/pascal; add example [Truta]
-- Remove contrib/delphi2; add a new contrib/delphi [Truta]
-- Avoid inclusion of the nonstandard <memory.h> in contrib/iostream,
-  and fix some method prototypes [Truta]
-- Fix the ZCR_SEED2 constant to avoid warnings in contrib/minizip
-  [Truta]
-- Avoid the use of backslash (\) in contrib/minizip [Vollant]
-- Fix file time handling in contrib/untgz; update makefiles [Truta]
-- Update contrib/vstudio/vc70_32 to comply with the new DLL guidelines
-  [Vollant]
-- Remove contrib/vstudio/vc15_16 [Vollant]
-- Rename contrib/vstudio/vc70_32 to contrib/vstudio/vc7 [Truta]
-- Update README.contrib [Truta]
-- Invert the assignment order of match_head and s->prev[...] in
-  INSERT_STRING [Truta]
-- Compare TOO_FAR with 32767 instead of 32768, to avoid 16-bit warnings
-  [Truta]
-- Compare function pointers with 0, not with NULL or Z_NULL [Truta]
-- Fix prototype of syncsearch in inflate.c [Truta]
-- Introduce ASMINF macro to be enabled when using an ASM implementation
-  of inflate_fast [Truta]
-- Change NO_DEFLATE to NO_GZCOMPRESS [Truta]
-- Modify test_gzio in example.c to take a single file name as a
-  parameter [Truta]
-- Exit the example.c program if gzopen fails [Truta]
-- Add type casts around strlen in example.c [Truta]
-- Remove casting to sizeof in minigzip.c; give a proper type
-  to the variable compared with SUFFIX_LEN [Truta]
-- Update definitions of STDC and STDC99 in zconf.h [Truta]
-- Synchronize zconf.h with the new Windows DLL interface [Truta]
-- Use SYS16BIT instead of __32BIT__ to distinguish between
-  16- and 32-bit platforms [Truta]
-- Use far memory allocators in small 16-bit memory models for
-  Turbo C [Truta]
-- Add info about the use of ASMV, ASMINF and ZLIB_WINAPI in
-  zlibCompileFlags [Truta]
-- Cygwin has vsnprintf [Wilson]
-- In Windows16, OS_CODE is 0, as in MSDOS [Truta]
-- In Cygwin, OS_CODE is 3 (Unix), not 11 (Windows32) [Wilson]
-
-Changes in 1.2.0.4 (10 August 2003)
-- Minor FAQ updates
-- Be more strict when checking inflateInit2's windowBits parameter
-- Change NO_GUNZIP compile option to NO_GZIP to cover deflate as well
-- Add gzip wrapper option to deflateInit2 using windowBits
-- Add updated QNX rule in configure and qnx directory [Bonnefoy]
-- Make inflate distance-too-far checks more rigorous
-- Clean up FAR usage in inflate
-- Add casting to sizeof() in gzio.c and minigzip.c
-
-Changes in 1.2.0.3 (19 July 2003)
-- Fix silly error in gzungetc() implementation [Vollant]
-- Update contrib/minizip and contrib/vstudio [Vollant]
-- Fix printf format in example.c
-- Correct cdecl support in zconf.in.h [Anisimkov]
-- Minor FAQ updates
-
-Changes in 1.2.0.2 (13 July 2003)
-- Add ZLIB_VERNUM in zlib.h for numerical preprocessor comparisons
-- Attempt to avoid warnings in crc32.c for pointer-int conversion
-- Add AIX to configure, remove aix directory [Bakker]
-- Add some casts to minigzip.c
-- Improve checking after insecure sprintf() or vsprintf() calls
-- Remove #elif's from crc32.c
-- Change leave label to inf_leave in inflate.c and infback.c to avoid
-  library conflicts
-- Remove inflate gzip decoding by default--only enable gzip decoding by
-  special request for stricter backward compatibility
-- Add zlibCompileFlags() function to return compilation information
-- More typecasting in deflate.c to avoid warnings
-- Remove leading underscore from _Capital #defines [Truta]
-- Fix configure to link shared library when testing
-- Add some Windows CE target adjustments [Mai]
-- Remove #define ZLIB_DLL in zconf.h [Vollant]
-- Add zlib.3 [Rodgers]
-- Update RFC URL in deflate.c and algorithm.txt [Mai]
-- Add zlib_dll_FAQ.txt to contrib [Truta]
-- Add UL to some constants [Truta]
-- Update minizip and vstudio [Vollant]
-- Remove vestigial NEED_DUMMY_RETURN from zconf.in.h
-- Expand use of NO_DUMMY_DECL to avoid all dummy structures
-- Added iostream3 to contrib [Schwardt]
-- Replace rewind() with fseek() for WinCE [Truta]
-- Improve setting of zlib format compression level flags
-    - Report 0 for huffman and rle strategies and for level == 0 or 1
-    - Report 2 only for level == 6
-- Only deal with 64K limit when necessary at compile time [Truta]
-- Allow TOO_FAR check to be turned off at compile time [Truta]
-- Add gzclearerr() function [Souza]
-- Add gzungetc() function
-
-Changes in 1.2.0.1 (17 March 2003)
-- Add Z_RLE strategy for run-length encoding [Truta]
-    - When Z_RLE requested, restrict matches to distance one
-    - Update zlib.h, minigzip.c, gzopen(), gzdopen() for Z_RLE
-- Correct FASTEST compilation to allow level == 0
-- Clean up what gets compiled for FASTEST
-- Incorporate changes to zconf.in.h [Vollant]
-    - Refine detection of Turbo C need for dummy returns
-    - Refine ZLIB_DLL compilation
-    - Include additional header file on VMS for off_t typedef
-- Try to use _vsnprintf where it supplants vsprintf [Vollant]
-- Add some casts in inffast.c
-- Enchance comments in zlib.h on what happens if gzprintf() tries to
-  write more than 4095 bytes before compression
-- Remove unused state from inflateBackEnd()
-- Remove exit(0) from minigzip.c, example.c
-- Get rid of all those darn tabs
-- Add "check" target to Makefile.in that does the same thing as "test"
-- Add "mostlyclean" and "maintainer-clean" targets to Makefile.in
-- Update contrib/inflate86 [Anderson]
-- Update contrib/testzlib, contrib/vstudio, contrib/minizip [Vollant]
-- Add msdos and win32 directories with makefiles [Truta]
-- More additions and improvements to the FAQ
-
-Changes in 1.2.0 (9 March 2003)
-- New and improved inflate code
-    - About 20% faster
-    - Does not allocate 32K window unless and until needed
-    - Automatically detects and decompresses gzip streams
-    - Raw inflate no longer needs an extra dummy byte at end
-    - Added inflateBack functions using a callback interface--even faster
-      than inflate, useful for file utilities (gzip, zip)
-    - Added inflateCopy() function to record state for random access on
-      externally generated deflate streams (e.g. in gzip files)
-    - More readable code (I hope)
-- New and improved crc32()
-    - About 50% faster, thanks to suggestions from Rodney Brown
-- Add deflateBound() and compressBound() functions
-- Fix memory leak in deflateInit2()
-- Permit setting dictionary for raw deflate (for parallel deflate)
-- Fix const declaration for gzwrite()
-- Check for some malloc() failures in gzio.c
-- Fix bug in gzopen() on single-byte file 0x1f
-- Fix bug in gzread() on concatenated file with 0x1f at end of buffer
-  and next buffer doesn't start with 0x8b
-- Fix uncompress() to return Z_DATA_ERROR on truncated input
-- Free memory at end of example.c
-- Remove MAX #define in trees.c (conflicted with some libraries)
-- Fix static const's in deflate.c, gzio.c, and zutil.[ch]
-- Declare malloc() and free() in gzio.c if STDC not defined
-- Use malloc() instead of calloc() in zutil.c if int big enough
-- Define STDC for AIX
-- Add aix/ with approach for compiling shared library on AIX
-- Add HP-UX support for shared libraries in configure
-- Add OpenUNIX support for shared libraries in configure
-- Use $cc instead of gcc to build shared library
-- Make prefix directory if needed when installing
-- Correct Macintosh avoidance of typedef Byte in zconf.h
-- Correct Turbo C memory allocation when under Linux
-- Use libz.a instead of -lz in Makefile (assure use of compiled library)
-- Update configure to check for snprintf or vsnprintf functions and their
-  return value, warn during make if using an insecure function
-- Fix configure problem with compile-time knowledge of HAVE_UNISTD_H that
-  is lost when library is used--resolution is to build new zconf.h
-- Documentation improvements (in zlib.h):
-    - Document raw deflate and inflate
-    - Update RFCs URL
-    - Point out that zlib and gzip formats are different
-    - Note that Z_BUF_ERROR is not fatal
-    - Document string limit for gzprintf() and possible buffer overflow
-    - Note requirement on avail_out when flushing
-    - Note permitted values of flush parameter of inflate()
-- Add some FAQs (and even answers) to the FAQ
-- Add contrib/inflate86/ for x86 faster inflate
-- Add contrib/blast/ for PKWare Data Compression Library decompression
-- Add contrib/puff/ simple inflate for deflate format description
-
-Changes in 1.1.4 (11 March 2002)
-- ZFREE was repeated on same allocation on some error conditions.
-  This creates a security problem described in
-  http://www.zlib.org/advisory-2002-03-11.txt
-- Returned incorrect error (Z_MEM_ERROR) on some invalid data
-- Avoid accesses before window for invalid distances with inflate window
-  less than 32K.
-- force windowBits > 8 to avoid a bug in the encoder for a window size
-  of 256 bytes. (A complete fix will be available in 1.1.5).
-
-Changes in 1.1.3 (9 July 1998)
-- fix "an inflate input buffer bug that shows up on rare but persistent
-  occasions" (Mark)
-- fix gzread and gztell for concatenated .gz files (Didier Le Botlan)
-- fix gzseek(..., SEEK_SET) in write mode
-- fix crc check after a gzeek (Frank Faubert)
-- fix miniunzip when the last entry in a zip file is itself a zip file
-  (J Lillge)
-- add contrib/asm586 and contrib/asm686 (Brian Raiter)
-  See http://www.muppetlabs.com/~breadbox/software/assembly.html
-- add support for Delphi 3 in contrib/delphi (Bob Dellaca)
-- add support for C++Builder 3 and Delphi 3 in contrib/delphi2 (Davide Moretti)
-- do not exit prematurely in untgz if 0 at start of block (Magnus Holmgren)
-- use macro EXTERN instead of extern to support DLL for BeOS (Sander Stoks)
-- added a FAQ file
-
-- Support gzdopen on Mac with Metrowerks (Jason Linhart)
-- Do not redefine Byte on Mac (Brad Pettit & Jason Linhart)
-- define SEEK_END too if SEEK_SET is not defined (Albert Chin-A-Young)
-- avoid some warnings with Borland C (Tom Tanner)
-- fix a problem in contrib/minizip/zip.c for 16-bit MSDOS (Gilles Vollant)
-- emulate utime() for WIN32 in contrib/untgz  (Gilles Vollant)
-- allow several arguments to configure (Tim Mooney, Frodo Looijaard)
-- use libdir and includedir in Makefile.in (Tim Mooney)
-- support shared libraries on OSF1 V4 (Tim Mooney)
-- remove so_locations in "make clean"  (Tim Mooney)
-- fix maketree.c compilation error (Glenn, Mark)
-- Python interface to zlib now in Python 1.5 (Jeremy Hylton)
-- new Makefile.riscos (Rich Walker)
-- initialize static descriptors in trees.c for embedded targets (Nick Smith)
-- use "foo-gz" in example.c for RISCOS and VMS (Nick Smith)
-- add the OS/2 files in Makefile.in too (Andrew Zabolotny)
-- fix fdopen and halloc macros for Microsoft C 6.0 (Tom Lane)
-- fix maketree.c to allow clean compilation of inffixed.h (Mark)
-- fix parameter check in deflateCopy (Gunther Nikl)
-- cleanup trees.c, use compressed_len only in debug mode (Christian Spieler)
-- Many portability patches by Christian Spieler:
-  . zutil.c, zutil.h: added "const" for zmem*
-  . Make_vms.com: fixed some typos
-  . Make_vms.com: msdos/Makefile.*: removed zutil.h from some dependency lists
-  . msdos/Makefile.msc: remove "default rtl link library" info from obj files
-  . msdos/Makefile.*: use model-dependent name for the built zlib library
-  . msdos/Makefile.emx, nt/Makefile.emx, nt/Makefile.gcc:
-     new makefiles, for emx (DOS/OS2), emx&rsxnt and mingw32 (Windows 9x / NT)
-- use define instead of typedef for Bytef also for MSC small/medium (Tom Lane)
-- replace __far with _far for better portability (Christian Spieler, Tom Lane)
-- fix test for errno.h in configure (Tim Newsham)
-
-Changes in 1.1.2 (19 March 98)
-- added contrib/minzip, mini zip and unzip based on zlib (Gilles Vollant)
-  See http://www.winimage.com/zLibDll/unzip.html
-- preinitialize the inflate tables for fixed codes, to make the code
-  completely thread safe (Mark)
-- some simplifications and slight speed-up to the inflate code (Mark)
-- fix gzeof on non-compressed files (Allan Schrum)
-- add -std1 option in configure for OSF1 to fix gzprintf (Martin Mokrejs)
-- use default value of 4K for Z_BUFSIZE for 16-bit MSDOS (Tim Wegner + Glenn)
-- added os2/Makefile.def and os2/zlib.def (Andrew Zabolotny)
-- add shared lib support for UNIX_SV4.2MP (MATSUURA Takanori)
-- do not wrap extern "C" around system includes (Tom Lane)
-- mention zlib binding for TCL in README (Andreas Kupries)
-- added amiga/Makefile.pup for Amiga powerUP SAS/C PPC (Andreas Kleinert)
-- allow "make install prefix=..." even after configure (Glenn Randers-Pehrson)
-- allow "configure --prefix $HOME" (Tim Mooney)
-- remove warnings in example.c and gzio.c (Glenn Randers-Pehrson)
-- move Makefile.sas to amiga/Makefile.sas
-
-Changes in 1.1.1 (27 Feb 98)
-- fix macros _tr_tally_* in deflate.h for debug mode  (Glenn Randers-Pehrson)
-- remove block truncation heuristic which had very marginal effect for zlib
-  (smaller lit_bufsize than in gzip 1.2.4) and degraded a little the
-  compression ratio on some files. This also allows inlining _tr_tally for
-  matches in deflate_slow.
-- added msdos/Makefile.w32 for WIN32 Microsoft Visual C++ (Bob Frazier)
-
-Changes in 1.1.0 (24 Feb 98)
-- do not return STREAM_END prematurely in inflate (John Bowler)
-- revert to the zlib 1.0.8 inflate to avoid the gcc 2.8.0 bug (Jeremy Buhler)
-- compile with -DFASTEST to get compression code optimized for speed only
-- in minigzip, try mmap'ing the input file first (Miguel Albrecht)
-- increase size of I/O buffers in minigzip.c and gzio.c (not a big gain
-  on Sun but significant on HP)
-
-- add a pointer to experimental unzip library in README (Gilles Vollant)
-- initialize variable gcc in configure (Chris Herborth)
-
-Changes in 1.0.9 (17 Feb 1998)
-- added gzputs and gzgets functions
-- do not clear eof flag in gzseek (Mark Diekhans)
-- fix gzseek for files in transparent mode (Mark Diekhans)
-- do not assume that vsprintf returns the number of bytes written (Jens Krinke)
-- replace EXPORT with ZEXPORT to avoid conflict with other programs
-- added compress2 in zconf.h, zlib.def, zlib.dnt
-- new asm code from Gilles Vollant in contrib/asm386
-- simplify the inflate code (Mark):
- . Replace ZALLOC's in huft_build() with single ZALLOC in inflate_blocks_new()
- . ZALLOC the length list in inflate_trees_fixed() instead of using stack
- . ZALLOC the value area for huft_build() instead of using stack
- . Simplify Z_FINISH check in inflate()
-
-- Avoid gcc 2.8.0 comparison bug a little differently than zlib 1.0.8
-- in inftrees.c, avoid cc -O bug on HP (Farshid Elahi)
-- in zconf.h move the ZLIB_DLL stuff earlier to avoid problems with
-  the declaration of FAR (Gilles VOllant)
-- install libz.so* with mode 755 (executable) instead of 644 (Marc Lehmann)
-- read_buf buf parameter of type Bytef* instead of charf*
-- zmemcpy parameters are of type Bytef*, not charf* (Joseph Strout)
-- do not redeclare unlink in minigzip.c for WIN32 (John Bowler)
-- fix check for presence of directories in "make install" (Ian Willis)
-
-Changes in 1.0.8 (27 Jan 1998)
-- fixed offsets in contrib/asm386/gvmat32.asm (Gilles Vollant)
-- fix gzgetc and gzputc for big endian systems (Markus Oberhumer)
-- added compress2() to allow setting the compression level
-- include sys/types.h to get off_t on some systems (Marc Lehmann & QingLong)
-- use constant arrays for the static trees in trees.c instead of computing
-  them at run time (thanks to Ken Raeburn for this suggestion). To create
-  trees.h, compile with GEN_TREES_H and run "make test".
-- check return code of example in "make test" and display result
-- pass minigzip command line options to file_compress
-- simplifying code of inflateSync to avoid gcc 2.8 bug
-
-- support CC="gcc -Wall" in configure -s (QingLong)
-- avoid a flush caused by ftell in gzopen for write mode (Ken Raeburn)
-- fix test for shared library support to avoid compiler warnings
-- zlib.lib -> zlib.dll in msdos/zlib.rc (Gilles Vollant)
-- check for TARGET_OS_MAC in addition to MACOS (Brad Pettit)
-- do not use fdopen for Metrowerks on Mac (Brad Pettit))
-- add checks for gzputc and gzputc in example.c
-- avoid warnings in gzio.c and deflate.c (Andreas Kleinert)
-- use const for the CRC table (Ken Raeburn)
-- fixed "make uninstall" for shared libraries
-- use Tracev instead of Trace in infblock.c
-- in example.c use correct compressed length for test_sync
-- suppress +vnocompatwarnings in configure for HPUX (not always supported)
-
-Changes in 1.0.7 (20 Jan 1998)
-- fix gzseek which was broken in write mode
-- return error for gzseek to negative absolute position
-- fix configure for Linux (Chun-Chung Chen)
-- increase stack space for MSC (Tim Wegner)
-- get_crc_table and inflateSyncPoint are EXPORTed (Gilles Vollant)
-- define EXPORTVA for gzprintf (Gilles Vollant)
-- added man page zlib.3 (Rick Rodgers)
-- for contrib/untgz, fix makedir() and improve Makefile
-
-- check gzseek in write mode in example.c
-- allocate extra buffer for seeks only if gzseek is actually called
-- avoid signed/unsigned comparisons (Tim Wegner, Gilles Vollant)
-- add inflateSyncPoint in zconf.h
-- fix list of exported functions in nt/zlib.dnt and mdsos/zlib.def
-
-Changes in 1.0.6 (19 Jan 1998)
-- add functions gzprintf, gzputc, gzgetc, gztell, gzeof, gzseek, gzrewind and
-  gzsetparams (thanks to Roland Giersig and Kevin Ruland for some of this code)
-- Fix a deflate bug occurring only with compression level 0 (thanks to
-  Andy Buckler for finding this one).
-- In minigzip, pass transparently also the first byte for .Z files.
-- return Z_BUF_ERROR instead of Z_OK if output buffer full in uncompress()
-- check Z_FINISH in inflate (thanks to Marc Schluper)
-- Implement deflateCopy (thanks to Adam Costello)
-- make static libraries by default in configure, add --shared option.
-- move MSDOS or Windows specific files to directory msdos
-- suppress the notion of partial flush to simplify the interface
-  (but the symbol Z_PARTIAL_FLUSH is kept for compatibility with 1.0.4)
-- suppress history buffer provided by application to simplify the interface
-  (this feature was not implemented anyway in 1.0.4)
-- next_in and avail_in must be initialized before calling inflateInit or
-  inflateInit2
-- add EXPORT in all exported functions (for Windows DLL)
-- added Makefile.nt (thanks to Stephen Williams)
-- added the unsupported "contrib" directory:
-   contrib/asm386/ by Gilles Vollant <info@winimage.com>
-        386 asm code replacing longest_match().
-   contrib/iostream/ by Kevin Ruland <kevin@rodin.wustl.edu>
-        A C++ I/O streams interface to the zlib gz* functions
-   contrib/iostream2/  by Tyge Løvset <Tyge.Lovset@cmr.no>
-        Another C++ I/O streams interface
-   contrib/untgz/  by "Pedro A. Aranda Guti\irrez" <paag@tid.es>
-        A very simple tar.gz file extractor using zlib
-   contrib/visual-basic.txt by Carlos Rios <c_rios@sonda.cl>
-        How to use compress(), uncompress() and the gz* functions from VB.
-- pass params -f (filtered data), -h (huffman only), -1 to -9 (compression
-  level) in minigzip (thanks to Tom Lane)
-
-- use const for rommable constants in deflate
-- added test for gzseek and gztell in example.c
-- add undocumented function inflateSyncPoint() (hack for Paul Mackerras)
-- add undocumented function zError to convert error code to string
-  (for Tim Smithers)
-- Allow compilation of gzio with -DNO_DEFLATE to avoid the compression code.
-- Use default memcpy for Symantec MSDOS compiler.
-- Add EXPORT keyword for check_func (needed for Windows DLL)
-- add current directory to LD_LIBRARY_PATH for "make test"
-- create also a link for libz.so.1
-- added support for FUJITSU UXP/DS (thanks to Toshiaki Nomura)
-- use $(SHAREDLIB) instead of libz.so in Makefile.in (for HPUX)
-- added -soname for Linux in configure (Chun-Chung Chen,
-- assign numbers to the exported functions in zlib.def (for Windows DLL)
-- add advice in zlib.h for best usage of deflateSetDictionary
-- work around compiler bug on Atari (cast Z_NULL in call of s->checkfn)
-- allow compilation with ANSI keywords only enabled for TurboC in large model
-- avoid "versionString"[0] (Borland bug)
-- add NEED_DUMMY_RETURN for Borland
-- use variable z_verbose for tracing in debug mode (L. Peter Deutsch).
-- allow compilation with CC
-- defined STDC for OS/2 (David Charlap)
-- limit external names to 8 chars for MVS (Thomas Lund)
-- in minigzip.c, use static buffers only for 16-bit systems
-- fix suffix check for "minigzip -d foo.gz"
-- do not return an error for the 2nd of two consecutive gzflush() (Felix Lee)
-- use _fdopen instead of fdopen for MSC >= 6.0 (Thomas Fanslau)
-- added makelcc.bat for lcc-win32 (Tom St Denis)
-- in Makefile.dj2, use copy and del instead of install and rm (Frank Donahoe)
-- Avoid expanded $Id$. Use "rcs -kb" or "cvs admin -kb" to avoid Id expansion.
-- check for unistd.h in configure (for off_t)
-- remove useless check parameter in inflate_blocks_free
-- avoid useless assignment of s->check to itself in inflate_blocks_new
-- do not flush twice in gzclose (thanks to Ken Raeburn)
-- rename FOPEN as F_OPEN to avoid clash with /usr/include/sys/file.h
-- use NO_ERRNO_H instead of enumeration of operating systems with errno.h
-- work around buggy fclose on pipes for HP/UX
-- support zlib DLL with BORLAND C++ 5.0 (thanks to Glenn Randers-Pehrson)
-- fix configure if CC is already equal to gcc
-
-Changes in 1.0.5 (3 Jan 98)
-- Fix inflate to terminate gracefully when fed corrupted or invalid data
-- Use const for rommable constants in inflate
-- Eliminate memory leaks on error conditions in inflate
-- Removed some vestigial code in inflate
-- Update web address in README
-
-Changes in 1.0.4 (24 Jul 96)
-- In very rare conditions, deflate(s, Z_FINISH) could fail to produce an EOF
-  bit, so the decompressor could decompress all the correct data but went
-  on to attempt decompressing extra garbage data. This affected minigzip too.
-- zlibVersion and gzerror return const char* (needed for DLL)
-- port to RISCOS (no fdopen, no multiple dots, no unlink, no fileno)
-- use z_error only for DEBUG (avoid problem with DLLs)
-
-Changes in 1.0.3 (2 Jul 96)
-- use z_streamp instead of z_stream *, which is now a far pointer in MSDOS
-  small and medium models; this makes the library incompatible with previous
-  versions for these models. (No effect in large model or on other systems.)
-- return OK instead of BUF_ERROR if previous deflate call returned with
-  avail_out as zero but there is nothing to do
-- added memcmp for non STDC compilers
-- define NO_DUMMY_DECL for more Mac compilers (.h files merged incorrectly)
-- define __32BIT__ if __386__ or i386 is defined (pb. with Watcom and SCO)
-- better check for 16-bit mode MSC (avoids problem with Symantec)
-
-Changes in 1.0.2 (23 May 96)
-- added Windows DLL support
-- added a function zlibVersion (for the DLL support)
-- fixed declarations using Bytef in infutil.c (pb with MSDOS medium model)
-- Bytef is define's instead of typedef'd only for Borland C
-- avoid reading uninitialized memory in example.c
-- mention in README that the zlib format is now RFC1950
-- updated Makefile.dj2
-- added algorithm.doc
-
-Changes in 1.0.1 (20 May 96) [1.0 skipped to avoid confusion]
-- fix array overlay in deflate.c which sometimes caused bad compressed data
-- fix inflate bug with empty stored block
-- fix MSDOS medium model which was broken in 0.99
-- fix deflateParams() which could generate bad compressed data.
-- Bytef is define'd instead of typedef'ed (work around Borland bug)
-- added an INDEX file
-- new makefiles for DJGPP (Makefile.dj2), 32-bit Borland (Makefile.b32),
-  Watcom (Makefile.wat), Amiga SAS/C (Makefile.sas)
-- speed up adler32 for modern machines without auto-increment
-- added -ansi for IRIX in configure
-- static_init_done in trees.c is an int
-- define unlink as delete for VMS
-- fix configure for QNX
-- add configure branch for SCO and HPUX
-- avoid many warnings (unused variables, dead assignments, etc...)
-- no fdopen for BeOS
-- fix the Watcom fix for 32 bit mode (define FAR as empty)
-- removed redefinition of Byte for MKWERKS
-- work around an MWKERKS bug (incorrect merge of all .h files)
-
-Changes in 0.99 (27 Jan 96)
-- allow preset dictionary shared between compressor and decompressor
-- allow compression level 0 (no compression)
-- add deflateParams in zlib.h: allow dynamic change of compression level
-  and compression strategy.
-- test large buffers and deflateParams in example.c
-- add optional "configure" to build zlib as a shared library
-- suppress Makefile.qnx, use configure instead
-- fixed deflate for 64-bit systems (detected on Cray)
-- fixed inflate_blocks for 64-bit systems (detected on Alpha)
-- declare Z_DEFLATED in zlib.h (possible parameter for deflateInit2)
-- always return Z_BUF_ERROR when deflate() has nothing to do
-- deflateInit and inflateInit are now macros to allow version checking
-- prefix all global functions and types with z_ with -DZ_PREFIX
-- make falloc completely reentrant (inftrees.c)
-- fixed very unlikely race condition in ct_static_init
-- free in reverse order of allocation to help memory manager
-- use zlib-1.0/* instead of zlib/* inside the tar.gz
-- make zlib warning-free with "gcc -O3 -Wall -Wwrite-strings -Wpointer-arith
-  -Wconversion -Wstrict-prototypes -Wmissing-prototypes"
-- allow gzread on concatenated .gz files
-- deflateEnd now returns Z_DATA_ERROR if it was premature
-- deflate is finally (?) fully deterministic (no matches beyond end of input)
-- Document Z_SYNC_FLUSH
-- add uninstall in Makefile
-- Check for __cpluplus in zlib.h
-- Better test in ct_align for partial flush
-- avoid harmless warnings for Borland C++
-- initialize hash_head in deflate.c
-- avoid warning on fdopen (gzio.c) for HP cc -Aa
-- include stdlib.h for STDC compilers
-- include errno.h for Cray
-- ignore error if ranlib doesn't exist
-- call ranlib twice for NeXTSTEP
-- use exec_prefix instead of prefix for libz.a
-- renamed ct_* as _tr_* to avoid conflict with applications
-- clear z->msg in inflateInit2 before any error return
-- initialize opaque in example.c, gzio.c, deflate.c and inflate.c
-- fixed typo in zconf.h (_GNUC__ => __GNUC__)
-- check for WIN32 in zconf.h and zutil.c (avoid farmalloc in 32-bit mode)
-- fix typo in Make_vms.com (f$trnlnm -> f$getsyi)
-- in fcalloc, normalize pointer if size > 65520 bytes
-- don't use special fcalloc for 32 bit Borland C++
-- use STDC instead of __GO32__ to avoid redeclaring exit, calloc, etc...
-- use Z_BINARY instead of BINARY
-- document that gzclose after gzdopen will close the file
-- allow "a" as mode in gzopen.
-- fix error checking in gzread
-- allow skipping .gz extra-field on pipes
-- added reference to Perl interface in README
-- put the crc table in FAR data (I dislike more and more the medium model :)
-- added get_crc_table
-- added a dimension to all arrays (Borland C can't count).
-- workaround Borland C bug in declaration of inflate_codes_new & inflate_fast
-- guard against multiple inclusion of *.h (for precompiled header on Mac)
-- Watcom C pretends to be Microsoft C small model even in 32 bit mode.
-- don't use unsized arrays to avoid silly warnings by Visual C++:
-     warning C4746: 'inflate_mask' : unsized array treated as  '__far'
-     (what's wrong with far data in far model?).
-- define enum out of inflate_blocks_state to allow compilation with C++
-
-Changes in 0.95 (16 Aug 95)
-- fix MSDOS small and medium model (now easier to adapt to any compiler)
-- inlined send_bits
-- fix the final (:-) bug for deflate with flush (output was correct but
-  not completely flushed in rare occasions).
-- default window size is same for compression and decompression
-  (it's now sufficient to set MAX_WBITS in zconf.h).
-- voidp -> voidpf and voidnp -> voidp (for consistency with other
-  typedefs and because voidnp was not near in large model).
-
-Changes in 0.94 (13 Aug 95)
-- support MSDOS medium model
-- fix deflate with flush (could sometimes generate bad output)
-- fix deflateReset (zlib header was incorrectly suppressed)
-- added support for VMS
-- allow a compression level in gzopen()
-- gzflush now calls fflush
-- For deflate with flush, flush even if no more input is provided.
-- rename libgz.a as libz.a
-- avoid complex expression in infcodes.c triggering Turbo C bug
-- work around a problem with gcc on Alpha (in INSERT_STRING)
-- don't use inline functions (problem with some gcc versions)
-- allow renaming of Byte, uInt, etc... with #define.
-- avoid warning about (unused) pointer before start of array in deflate.c
-- avoid various warnings in gzio.c, example.c, infblock.c, adler32.c, zutil.c
-- avoid reserved word 'new' in trees.c
-
-Changes in 0.93 (25 June 95)
-- temporarily disable inline functions
-- make deflate deterministic
-- give enough lookahead for PARTIAL_FLUSH
-- Set binary mode for stdin/stdout in minigzip.c for OS/2
-- don't even use signed char in inflate (not portable enough)
-- fix inflate memory leak for segmented architectures
-
-Changes in 0.92 (3 May 95)
-- don't assume that char is signed (problem on SGI)
-- Clear bit buffer when starting a stored block
-- no memcpy on Pyramid
-- suppressed inftest.c
-- optimized fill_window, put longest_match inline for gcc
-- optimized inflate on stored blocks.
-- untabify all sources to simplify patches
-
-Changes in 0.91 (2 May 95)
-- Default MEM_LEVEL is 8 (not 9 for Unix) as documented in zlib.h
-- Document the memory requirements in zconf.h
-- added "make install"
-- fix sync search logic in inflateSync
-- deflate(Z_FULL_FLUSH) now works even if output buffer too short
-- after inflateSync, don't scare people with just "lo world"
-- added support for DJGPP
-
-Changes in 0.9 (1 May 95)
-- don't assume that zalloc clears the allocated memory (the TurboC bug
-  was Mark's bug after all :)
-- let again gzread copy uncompressed data unchanged (was working in 0.71)
-- deflate(Z_FULL_FLUSH), inflateReset and inflateSync are now fully implemented
-- added a test of inflateSync in example.c
-- moved MAX_WBITS to zconf.h because users might want to change that.
-- document explicitly that zalloc(64K) on MSDOS must return a normalized
-  pointer (zero offset)
-- added Makefiles for Microsoft C, Turbo C, Borland C++
-- faster crc32()
-
-Changes in 0.8 (29 April 95)
-- added fast inflate (inffast.c)
-- deflate(Z_FINISH) now returns Z_STREAM_END when done. Warning: this
-  is incompatible with previous versions of zlib which returned Z_OK.
-- work around a TurboC compiler bug (bad code for b << 0, see infutil.h)
-  (actually that was not a compiler bug, see 0.81 above)
-- gzread no longer reads one extra byte in certain cases
-- In gzio destroy(), don't reference a freed structure
-- avoid many warnings for MSDOS
-- avoid the ERROR symbol which is used by MS Windows
-
-Changes in 0.71 (14 April 95)
-- Fixed more MSDOS compilation problems :( There is still a bug with
-  TurboC large model.
-
-Changes in 0.7 (14 April 95)
-- Added full inflate support.
-- Simplified the crc32() interface. The pre- and post-conditioning
-  (one's complement) is now done inside crc32(). WARNING: this is
-  incompatible with previous versions; see zlib.h for the new usage.
-
-Changes in 0.61 (12 April 95)
-- workaround for a bug in TurboC. example and minigzip now work on MSDOS.
-
-Changes in 0.6 (11 April 95)
-- added minigzip.c
-- added gzdopen to reopen a file descriptor as gzFile
-- added transparent reading of non-gziped files in gzread.
-- fixed bug in gzread (don't read crc as data)
-- fixed bug in destroy (gzio.c) (don't return Z_STREAM_END for gzclose).
-- don't allocate big arrays in the stack (for MSDOS)
-- fix some MSDOS compilation problems
-
-Changes in 0.5:
-- do real compression in deflate.c. Z_PARTIAL_FLUSH is supported but
-  not yet Z_FULL_FLUSH.
-- support decompression but only in a single step (forced Z_FINISH)
-- added opaque object for zalloc and zfree.
-- added deflateReset and inflateReset
-- added a variable zlib_version for consistency checking.
-- renamed the 'filter' parameter of deflateInit2 as 'strategy'.
-  Added Z_FILTERED and Z_HUFFMAN_ONLY constants.
-
-Changes in 0.4:
-- avoid "zip" everywhere, use zlib instead of ziplib.
-- suppress Z_BLOCK_FLUSH, interpret Z_PARTIAL_FLUSH as block flush
-  if compression method == 8.
-- added adler32 and crc32
-- renamed deflateOptions as deflateInit2, call one or the other but not both
-- added the method parameter for deflateInit2.
-- added inflateInit2
-- simplied considerably deflateInit and inflateInit by not supporting
-  user-provided history buffer. This is supported only in deflateInit2
-  and inflateInit2.
-
-Changes in 0.3:
-- prefix all macro names with Z_
-- use Z_FINISH instead of deflateEnd to finish compression.
-- added Z_HUFFMAN_ONLY
-- added gzerror()
diff --git a/dep/zlib/FAQ b/dep/zlib/FAQ
deleted file mode 100644
index 99b7cf92e..000000000
--- a/dep/zlib/FAQ
+++ /dev/null
@@ -1,368 +0,0 @@
-
-                Frequently Asked Questions about zlib
-
-
-If your question is not there, please check the zlib home page
-http://zlib.net/ which may have more recent information.
-The lastest zlib FAQ is at http://zlib.net/zlib_faq.html
-
-
- 1. Is zlib Y2K-compliant?
-
-    Yes. zlib doesn't handle dates.
-
- 2. Where can I get a Windows DLL version?
-
-    The zlib sources can be compiled without change to produce a DLL.  See the
-    file win32/DLL_FAQ.txt in the zlib distribution.  Pointers to the
-    precompiled DLL are found in the zlib web site at http://zlib.net/ .
-
- 3. Where can I get a Visual Basic interface to zlib?
-
-    See
-        * http://marknelson.us/1997/01/01/zlib-engine/
-        * win32/DLL_FAQ.txt in the zlib distribution
-
- 4. compress() returns Z_BUF_ERROR.
-
-    Make sure that before the call of compress(), the length of the compressed
-    buffer is equal to the available size of the compressed buffer and not
-    zero.  For Visual Basic, check that this parameter is passed by reference
-    ("as any"), not by value ("as long").
-
- 5. deflate() or inflate() returns Z_BUF_ERROR.
-
-    Before making the call, make sure that avail_in and avail_out are not zero.
-    When setting the parameter flush equal to Z_FINISH, also make sure that
-    avail_out is big enough to allow processing all pending input.  Note that a
-    Z_BUF_ERROR is not fatal--another call to deflate() or inflate() can be
-    made with more input or output space.  A Z_BUF_ERROR may in fact be
-    unavoidable depending on how the functions are used, since it is not
-    possible to tell whether or not there is more output pending when
-    strm.avail_out returns with zero.  See http://zlib.net/zlib_how.html for a
-    heavily annotated example.
-
- 6. Where's the zlib documentation (man pages, etc.)?
-
-    It's in zlib.h .  Examples of zlib usage are in the files test/example.c
-    and test/minigzip.c, with more in examples/ .
-
- 7. Why don't you use GNU autoconf or libtool or ...?
-
-    Because we would like to keep zlib as a very small and simple package.
-    zlib is rather portable and doesn't need much configuration.
-
- 8. I found a bug in zlib.
-
-    Most of the time, such problems are due to an incorrect usage of zlib.
-    Please try to reproduce the problem with a small program and send the
-    corresponding source to us at zlib@gzip.org .  Do not send multi-megabyte
-    data files without prior agreement.
-
- 9. Why do I get "undefined reference to gzputc"?
-
-    If "make test" produces something like
-
-       example.o(.text+0x154): undefined reference to `gzputc'
-
-    check that you don't have old files libz.* in /usr/lib, /usr/local/lib or
-    /usr/X11R6/lib. Remove any old versions, then do "make install".
-
-10. I need a Delphi interface to zlib.
-
-    See the contrib/delphi directory in the zlib distribution.
-
-11. Can zlib handle .zip archives?
-
-    Not by itself, no.  See the directory contrib/minizip in the zlib
-    distribution.
-
-12. Can zlib handle .Z files?
-
-    No, sorry.  You have to spawn an uncompress or gunzip subprocess, or adapt
-    the code of uncompress on your own.
-
-13. How can I make a Unix shared library?
-
-    By default a shared (and a static) library is built for Unix.  So:
-
-    make distclean
-    ./configure
-    make
-
-14. How do I install a shared zlib library on Unix?
-
-    After the above, then:
-
-    make install
-
-    However, many flavors of Unix come with a shared zlib already installed.
-    Before going to the trouble of compiling a shared version of zlib and
-    trying to install it, you may want to check if it's already there!  If you
-    can #include <zlib.h>, it's there.  The -lz option will probably link to
-    it.  You can check the version at the top of zlib.h or with the
-    ZLIB_VERSION symbol defined in zlib.h .
-
-15. I have a question about OttoPDF.
-
-    We are not the authors of OttoPDF. The real author is on the OttoPDF web
-    site: Joel Hainley, jhainley@myndkryme.com.
-
-16. Can zlib decode Flate data in an Adobe PDF file?
-
-    Yes. See http://www.pdflib.com/ . To modify PDF forms, see
-    http://sourceforge.net/projects/acroformtool/ .
-
-17. Why am I getting this "register_frame_info not found" error on Solaris?
-
-    After installing zlib 1.1.4 on Solaris 2.6, running applications using zlib
-    generates an error such as:
-
-        ld.so.1: rpm: fatal: relocation error: file /usr/local/lib/libz.so:
-        symbol __register_frame_info: referenced symbol not found
-
-    The symbol __register_frame_info is not part of zlib, it is generated by
-    the C compiler (cc or gcc).  You must recompile applications using zlib
-    which have this problem.  This problem is specific to Solaris.  See
-    http://www.sunfreeware.com for Solaris versions of zlib and applications
-    using zlib.
-
-18. Why does gzip give an error on a file I make with compress/deflate?
-
-    The compress and deflate functions produce data in the zlib format, which
-    is different and incompatible with the gzip format.  The gz* functions in
-    zlib on the other hand use the gzip format.  Both the zlib and gzip formats
-    use the same compressed data format internally, but have different headers
-    and trailers around the compressed data.
-
-19. Ok, so why are there two different formats?
-
-    The gzip format was designed to retain the directory information about a
-    single file, such as the name and last modification date.  The zlib format
-    on the other hand was designed for in-memory and communication channel
-    applications, and has a much more compact header and trailer and uses a
-    faster integrity check than gzip.
-
-20. Well that's nice, but how do I make a gzip file in memory?
-
-    You can request that deflate write the gzip format instead of the zlib
-    format using deflateInit2().  You can also request that inflate decode the
-    gzip format using inflateInit2().  Read zlib.h for more details.
-
-21. Is zlib thread-safe?
-
-    Yes.  However any library routines that zlib uses and any application-
-    provided memory allocation routines must also be thread-safe.  zlib's gz*
-    functions use stdio library routines, and most of zlib's functions use the
-    library memory allocation routines by default.  zlib's *Init* functions
-    allow for the application to provide custom memory allocation routines.
-
-    Of course, you should only operate on any given zlib or gzip stream from a
-    single thread at a time.
-
-22. Can I use zlib in my commercial application?
-
-    Yes.  Please read the license in zlib.h.
-
-23. Is zlib under the GNU license?
-
-    No.  Please read the license in zlib.h.
-
-24. The license says that altered source versions must be "plainly marked". So
-    what exactly do I need to do to meet that requirement?
-
-    You need to change the ZLIB_VERSION and ZLIB_VERNUM #defines in zlib.h.  In
-    particular, the final version number needs to be changed to "f", and an
-    identification string should be appended to ZLIB_VERSION.  Version numbers
-    x.x.x.f are reserved for modifications to zlib by others than the zlib
-    maintainers.  For example, if the version of the base zlib you are altering
-    is "1.2.3.4", then in zlib.h you should change ZLIB_VERNUM to 0x123f, and
-    ZLIB_VERSION to something like "1.2.3.f-zachary-mods-v3".  You can also
-    update the version strings in deflate.c and inftrees.c.
-
-    For altered source distributions, you should also note the origin and
-    nature of the changes in zlib.h, as well as in ChangeLog and README, along
-    with the dates of the alterations.  The origin should include at least your
-    name (or your company's name), and an email address to contact for help or
-    issues with the library.
-
-    Note that distributing a compiled zlib library along with zlib.h and
-    zconf.h is also a source distribution, and so you should change
-    ZLIB_VERSION and ZLIB_VERNUM and note the origin and nature of the changes
-    in zlib.h as you would for a full source distribution.
-
-25. Will zlib work on a big-endian or little-endian architecture, and can I
-    exchange compressed data between them?
-
-    Yes and yes.
-
-26. Will zlib work on a 64-bit machine?
-
-    Yes.  It has been tested on 64-bit machines, and has no dependence on any
-    data types being limited to 32-bits in length.  If you have any
-    difficulties, please provide a complete problem report to zlib@gzip.org
-
-27. Will zlib decompress data from the PKWare Data Compression Library?
-
-    No.  The PKWare DCL uses a completely different compressed data format than
-    does PKZIP and zlib.  However, you can look in zlib's contrib/blast
-    directory for a possible solution to your problem.
-
-28. Can I access data randomly in a compressed stream?
-
-    No, not without some preparation.  If when compressing you periodically use
-    Z_FULL_FLUSH, carefully write all the pending data at those points, and
-    keep an index of those locations, then you can start decompression at those
-    points.  You have to be careful to not use Z_FULL_FLUSH too often, since it
-    can significantly degrade compression.  Alternatively, you can scan a
-    deflate stream once to generate an index, and then use that index for
-    random access.  See examples/zran.c .
-
-29. Does zlib work on MVS, OS/390, CICS, etc.?
-
-    It has in the past, but we have not heard of any recent evidence.  There
-    were working ports of zlib 1.1.4 to MVS, but those links no longer work.
-    If you know of recent, successful applications of zlib on these operating
-    systems, please let us know.  Thanks.
-
-30. Is there some simpler, easier to read version of inflate I can look at to
-    understand the deflate format?
-
-    First off, you should read RFC 1951.  Second, yes.  Look in zlib's
-    contrib/puff directory.
-
-31. Does zlib infringe on any patents?
-
-    As far as we know, no.  In fact, that was originally the whole point behind
-    zlib.  Look here for some more information:
-
-    http://www.gzip.org/#faq11
-
-32. Can zlib work with greater than 4 GB of data?
-
-    Yes.  inflate() and deflate() will process any amount of data correctly.
-    Each call of inflate() or deflate() is limited to input and output chunks
-    of the maximum value that can be stored in the compiler's "unsigned int"
-    type, but there is no limit to the number of chunks.  Note however that the
-    strm.total_in and strm_total_out counters may be limited to 4 GB.  These
-    counters are provided as a convenience and are not used internally by
-    inflate() or deflate().  The application can easily set up its own counters
-    updated after each call of inflate() or deflate() to count beyond 4 GB.
-    compress() and uncompress() may be limited to 4 GB, since they operate in a
-    single call.  gzseek() and gztell() may be limited to 4 GB depending on how
-    zlib is compiled.  See the zlibCompileFlags() function in zlib.h.
-
-    The word "may" appears several times above since there is a 4 GB limit only
-    if the compiler's "long" type is 32 bits.  If the compiler's "long" type is
-    64 bits, then the limit is 16 exabytes.
-
-33. Does zlib have any security vulnerabilities?
-
-    The only one that we are aware of is potentially in gzprintf().  If zlib is
-    compiled to use sprintf() or vsprintf(), then there is no protection
-    against a buffer overflow of an 8K string space (or other value as set by
-    gzbuffer()), other than the caller of gzprintf() assuring that the output
-    will not exceed 8K.  On the other hand, if zlib is compiled to use
-    snprintf() or vsnprintf(), which should normally be the case, then there is
-    no vulnerability.  The ./configure script will display warnings if an
-    insecure variation of sprintf() will be used by gzprintf().  Also the
-    zlibCompileFlags() function will return information on what variant of
-    sprintf() is used by gzprintf().
-
-    If you don't have snprintf() or vsnprintf() and would like one, you can
-    find a portable implementation here:
-
-        http://www.ijs.si/software/snprintf/
-
-    Note that you should be using the most recent version of zlib.  Versions
-    1.1.3 and before were subject to a double-free vulnerability, and versions
-    1.2.1 and 1.2.2 were subject to an access exception when decompressing
-    invalid compressed data.
-
-34. Is there a Java version of zlib?
-
-    Probably what you want is to use zlib in Java. zlib is already included
-    as part of the Java SDK in the java.util.zip package. If you really want
-    a version of zlib written in the Java language, look on the zlib home
-    page for links: http://zlib.net/ .
-
-35. I get this or that compiler or source-code scanner warning when I crank it
-    up to maximally-pedantic. Can't you guys write proper code?
-
-    Many years ago, we gave up attempting to avoid warnings on every compiler
-    in the universe.  It just got to be a waste of time, and some compilers
-    were downright silly as well as contradicted each other.  So now, we simply
-    make sure that the code always works.
-
-36. Valgrind (or some similar memory access checker) says that deflate is
-    performing a conditional jump that depends on an uninitialized value.
-    Isn't that a bug?
-
-    No.  That is intentional for performance reasons, and the output of deflate
-    is not affected.  This only started showing up recently since zlib 1.2.x
-    uses malloc() by default for allocations, whereas earlier versions used
-    calloc(), which zeros out the allocated memory.  Even though the code was
-    correct, versions 1.2.4 and later was changed to not stimulate these
-    checkers.
-
-37. Will zlib read the (insert any ancient or arcane format here) compressed
-    data format?
-
-    Probably not. Look in the comp.compression FAQ for pointers to various
-    formats and associated software.
-
-38. How can I encrypt/decrypt zip files with zlib?
-
-    zlib doesn't support encryption.  The original PKZIP encryption is very
-    weak and can be broken with freely available programs.  To get strong
-    encryption, use GnuPG, http://www.gnupg.org/ , which already includes zlib
-    compression.  For PKZIP compatible "encryption", look at
-    http://www.info-zip.org/
-
-39. What's the difference between the "gzip" and "deflate" HTTP 1.1 encodings?
-
-    "gzip" is the gzip format, and "deflate" is the zlib format.  They should
-    probably have called the second one "zlib" instead to avoid confusion with
-    the raw deflate compressed data format.  While the HTTP 1.1 RFC 2616
-    correctly points to the zlib specification in RFC 1950 for the "deflate"
-    transfer encoding, there have been reports of servers and browsers that
-    incorrectly produce or expect raw deflate data per the deflate
-    specification in RFC 1951, most notably Microsoft.  So even though the
-    "deflate" transfer encoding using the zlib format would be the more
-    efficient approach (and in fact exactly what the zlib format was designed
-    for), using the "gzip" transfer encoding is probably more reliable due to
-    an unfortunate choice of name on the part of the HTTP 1.1 authors.
-
-    Bottom line: use the gzip format for HTTP 1.1 encoding.
-
-40. Does zlib support the new "Deflate64" format introduced by PKWare?
-
-    No.  PKWare has apparently decided to keep that format proprietary, since
-    they have not documented it as they have previous compression formats.  In
-    any case, the compression improvements are so modest compared to other more
-    modern approaches, that it's not worth the effort to implement.
-
-41. I'm having a problem with the zip functions in zlib, can you help?
-
-    There are no zip functions in zlib.  You are probably using minizip by
-    Giles Vollant, which is found in the contrib directory of zlib.  It is not
-    part of zlib.  In fact none of the stuff in contrib is part of zlib.  The
-    files in there are not supported by the zlib authors.  You need to contact
-    the authors of the respective contribution for help.
-
-42. The match.asm code in contrib is under the GNU General Public License.
-    Since it's part of zlib, doesn't that mean that all of zlib falls under the
-    GNU GPL?
-
-    No.  The files in contrib are not part of zlib.  They were contributed by
-    other authors and are provided as a convenience to the user within the zlib
-    distribution.  Each item in contrib has its own license.
-
-43. Is zlib subject to export controls?  What is its ECCN?
-
-    zlib is not subject to export controls, and so is classified as EAR99.
-
-44. Can you please sign these lengthy legal documents and fax them back to us
-    so that we can use your software in our product?
-
-    No. Go away. Shoo.
diff --git a/dep/zlib/README b/dep/zlib/README
deleted file mode 100644
index 51106de47..000000000
--- a/dep/zlib/README
+++ /dev/null
@@ -1,115 +0,0 @@
-ZLIB DATA COMPRESSION LIBRARY
-
-zlib 1.2.11 is a general purpose data compression library.  All the code is
-thread safe.  The data format used by the zlib library is described by RFCs
-(Request for Comments) 1950 to 1952 in the files
-http://tools.ietf.org/html/rfc1950 (zlib format), rfc1951 (deflate format) and
-rfc1952 (gzip format).
-
-All functions of the compression library are documented in the file zlib.h
-(volunteer to write man pages welcome, contact zlib@gzip.org).  A usage example
-of the library is given in the file test/example.c which also tests that
-the library is working correctly.  Another example is given in the file
-test/minigzip.c.  The compression library itself is composed of all source
-files in the root directory.
-
-To compile all files and run the test program, follow the instructions given at
-the top of Makefile.in.  In short "./configure; make test", and if that goes
-well, "make install" should work for most flavors of Unix.  For Windows, use
-one of the special makefiles in win32/ or contrib/vstudio/ .  For VMS, use
-make_vms.com.
-
-Questions about zlib should be sent to <zlib@gzip.org>, or to Gilles Vollant
-<info@winimage.com> for the Windows DLL version.  The zlib home page is
-http://zlib.net/ .  Before reporting a problem, please check this site to
-verify that you have the latest version of zlib; otherwise get the latest
-version and check whether the problem still exists or not.
-
-PLEASE read the zlib FAQ http://zlib.net/zlib_faq.html before asking for help.
-
-Mark Nelson <markn@ieee.org> wrote an article about zlib for the Jan.  1997
-issue of Dr.  Dobb's Journal; a copy of the article is available at
-http://marknelson.us/1997/01/01/zlib-engine/ .
-
-The changes made in version 1.2.11 are documented in the file ChangeLog.
-
-Unsupported third party contributions are provided in directory contrib/ .
-
-zlib is available in Java using the java.util.zip package, documented at
-http://java.sun.com/developer/technicalArticles/Programming/compression/ .
-
-A Perl interface to zlib written by Paul Marquess <pmqs@cpan.org> is available
-at CPAN (Comprehensive Perl Archive Network) sites, including
-http://search.cpan.org/~pmqs/IO-Compress-Zlib/ .
-
-A Python interface to zlib written by A.M. Kuchling <amk@amk.ca> is
-available in Python 1.5 and later versions, see
-http://docs.python.org/library/zlib.html .
-
-zlib is built into tcl: http://wiki.tcl.tk/4610 .
-
-An experimental package to read and write files in .zip format, written on top
-of zlib by Gilles Vollant <info@winimage.com>, is available in the
-contrib/minizip directory of zlib.
-
-
-Notes for some targets:
-
-- For Windows DLL versions, please see win32/DLL_FAQ.txt
-
-- For 64-bit Irix, deflate.c must be compiled without any optimization. With
-  -O, one libpng test fails. The test works in 32 bit mode (with the -n32
-  compiler flag). The compiler bug has been reported to SGI.
-
-- zlib doesn't work with gcc 2.6.3 on a DEC 3000/300LX under OSF/1 2.1 it works
-  when compiled with cc.
-
-- On Digital Unix 4.0D (formely OSF/1) on AlphaServer, the cc option -std1 is
-  necessary to get gzprintf working correctly. This is done by configure.
-
-- zlib doesn't work on HP-UX 9.05 with some versions of /bin/cc. It works with
-  other compilers. Use "make test" to check your compiler.
-
-- gzdopen is not supported on RISCOS or BEOS.
-
-- For PalmOs, see http://palmzlib.sourceforge.net/
-
-
-Acknowledgments:
-
-  The deflate format used by zlib was defined by Phil Katz.  The deflate and
-  zlib specifications were written by L.  Peter Deutsch.  Thanks to all the
-  people who reported problems and suggested various improvements in zlib; they
-  are too numerous to cite here.
-
-Copyright notice:
-
- (C) 1995-2017 Jean-loup Gailly and Mark Adler
-
-  This software is provided 'as-is', without any express or implied
-  warranty.  In no event will the authors be held liable for any damages
-  arising from the use of this software.
-
-  Permission is granted to anyone to use this software for any purpose,
-  including commercial applications, and to alter it and redistribute it
-  freely, subject to the following restrictions:
-
-  1. The origin of this software must not be misrepresented; you must not
-     claim that you wrote the original software. If you use this software
-     in a product, an acknowledgment in the product documentation would be
-     appreciated but is not required.
-  2. Altered source versions must be plainly marked as such, and must not be
-     misrepresented as being the original software.
-  3. This notice may not be removed or altered from any source distribution.
-
-  Jean-loup Gailly        Mark Adler
-  jloup@gzip.org          madler@alumni.caltech.edu
-
-If you use the zlib library in a product, we would appreciate *not* receiving
-lengthy legal documents to sign.  The sources are provided for free but without
-warranty of any kind.  The library has been entirely written by Jean-loup
-Gailly and Mark Adler; it does not include third-party code.
-
-If you redistribute modified sources, we would appreciate that you include in
-the file ChangeLog history information documenting your changes.  Please read
-the FAQ for more information on the distribution of modified source versions.
diff --git a/dep/zlib/include/zconf.h b/dep/zlib/include/zconf.h
deleted file mode 100644
index 5e1d68a00..000000000
--- a/dep/zlib/include/zconf.h
+++ /dev/null
@@ -1,534 +0,0 @@
-/* zconf.h -- configuration of the zlib compression library
- * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/* @(#) $Id$ */
-
-#ifndef ZCONF_H
-#define ZCONF_H
-
-/*
- * If you *really* need a unique prefix for all types and library functions,
- * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it.
- * Even better than compiling with -DZ_PREFIX would be to use configure to set
- * this permanently in zconf.h using "./configure --zprefix".
- */
-#ifdef Z_PREFIX     /* may be set to #if 1 by ./configure */
-#  define Z_PREFIX_SET
-
-/* all linked symbols and init macros */
-#  define _dist_code            z__dist_code
-#  define _length_code          z__length_code
-#  define _tr_align             z__tr_align
-#  define _tr_flush_bits        z__tr_flush_bits
-#  define _tr_flush_block       z__tr_flush_block
-#  define _tr_init              z__tr_init
-#  define _tr_stored_block      z__tr_stored_block
-#  define _tr_tally             z__tr_tally
-#  define adler32               z_adler32
-#  define adler32_combine       z_adler32_combine
-#  define adler32_combine64     z_adler32_combine64
-#  define adler32_z             z_adler32_z
-#  ifndef Z_SOLO
-#    define compress              z_compress
-#    define compress2             z_compress2
-#    define compressBound         z_compressBound
-#  endif
-#  define crc32                 z_crc32
-#  define crc32_combine         z_crc32_combine
-#  define crc32_combine64       z_crc32_combine64
-#  define crc32_z               z_crc32_z
-#  define deflate               z_deflate
-#  define deflateBound          z_deflateBound
-#  define deflateCopy           z_deflateCopy
-#  define deflateEnd            z_deflateEnd
-#  define deflateGetDictionary  z_deflateGetDictionary
-#  define deflateInit           z_deflateInit
-#  define deflateInit2          z_deflateInit2
-#  define deflateInit2_         z_deflateInit2_
-#  define deflateInit_          z_deflateInit_
-#  define deflateParams         z_deflateParams
-#  define deflatePending        z_deflatePending
-#  define deflatePrime          z_deflatePrime
-#  define deflateReset          z_deflateReset
-#  define deflateResetKeep      z_deflateResetKeep
-#  define deflateSetDictionary  z_deflateSetDictionary
-#  define deflateSetHeader      z_deflateSetHeader
-#  define deflateTune           z_deflateTune
-#  define deflate_copyright     z_deflate_copyright
-#  define get_crc_table         z_get_crc_table
-#  ifndef Z_SOLO
-#    define gz_error              z_gz_error
-#    define gz_intmax             z_gz_intmax
-#    define gz_strwinerror        z_gz_strwinerror
-#    define gzbuffer              z_gzbuffer
-#    define gzclearerr            z_gzclearerr
-#    define gzclose               z_gzclose
-#    define gzclose_r             z_gzclose_r
-#    define gzclose_w             z_gzclose_w
-#    define gzdirect              z_gzdirect
-#    define gzdopen               z_gzdopen
-#    define gzeof                 z_gzeof
-#    define gzerror               z_gzerror
-#    define gzflush               z_gzflush
-#    define gzfread               z_gzfread
-#    define gzfwrite              z_gzfwrite
-#    define gzgetc                z_gzgetc
-#    define gzgetc_               z_gzgetc_
-#    define gzgets                z_gzgets
-#    define gzoffset              z_gzoffset
-#    define gzoffset64            z_gzoffset64
-#    define gzopen                z_gzopen
-#    define gzopen64              z_gzopen64
-#    ifdef _WIN32
-#      define gzopen_w              z_gzopen_w
-#    endif
-#    define gzprintf              z_gzprintf
-#    define gzputc                z_gzputc
-#    define gzputs                z_gzputs
-#    define gzread                z_gzread
-#    define gzrewind              z_gzrewind
-#    define gzseek                z_gzseek
-#    define gzseek64              z_gzseek64
-#    define gzsetparams           z_gzsetparams
-#    define gztell                z_gztell
-#    define gztell64              z_gztell64
-#    define gzungetc              z_gzungetc
-#    define gzvprintf             z_gzvprintf
-#    define gzwrite               z_gzwrite
-#  endif
-#  define inflate               z_inflate
-#  define inflateBack           z_inflateBack
-#  define inflateBackEnd        z_inflateBackEnd
-#  define inflateBackInit       z_inflateBackInit
-#  define inflateBackInit_      z_inflateBackInit_
-#  define inflateCodesUsed      z_inflateCodesUsed
-#  define inflateCopy           z_inflateCopy
-#  define inflateEnd            z_inflateEnd
-#  define inflateGetDictionary  z_inflateGetDictionary
-#  define inflateGetHeader      z_inflateGetHeader
-#  define inflateInit           z_inflateInit
-#  define inflateInit2          z_inflateInit2
-#  define inflateInit2_         z_inflateInit2_
-#  define inflateInit_          z_inflateInit_
-#  define inflateMark           z_inflateMark
-#  define inflatePrime          z_inflatePrime
-#  define inflateReset          z_inflateReset
-#  define inflateReset2         z_inflateReset2
-#  define inflateResetKeep      z_inflateResetKeep
-#  define inflateSetDictionary  z_inflateSetDictionary
-#  define inflateSync           z_inflateSync
-#  define inflateSyncPoint      z_inflateSyncPoint
-#  define inflateUndermine      z_inflateUndermine
-#  define inflateValidate       z_inflateValidate
-#  define inflate_copyright     z_inflate_copyright
-#  define inflate_fast          z_inflate_fast
-#  define inflate_table         z_inflate_table
-#  ifndef Z_SOLO
-#    define uncompress            z_uncompress
-#    define uncompress2           z_uncompress2
-#  endif
-#  define zError                z_zError
-#  ifndef Z_SOLO
-#    define zcalloc               z_zcalloc
-#    define zcfree                z_zcfree
-#  endif
-#  define zlibCompileFlags      z_zlibCompileFlags
-#  define zlibVersion           z_zlibVersion
-
-/* all zlib typedefs in zlib.h and zconf.h */
-#  define Byte                  z_Byte
-#  define Bytef                 z_Bytef
-#  define alloc_func            z_alloc_func
-#  define charf                 z_charf
-#  define free_func             z_free_func
-#  ifndef Z_SOLO
-#    define gzFile                z_gzFile
-#  endif
-#  define gz_header             z_gz_header
-#  define gz_headerp            z_gz_headerp
-#  define in_func               z_in_func
-#  define intf                  z_intf
-#  define out_func              z_out_func
-#  define uInt                  z_uInt
-#  define uIntf                 z_uIntf
-#  define uLong                 z_uLong
-#  define uLongf                z_uLongf
-#  define voidp                 z_voidp
-#  define voidpc                z_voidpc
-#  define voidpf                z_voidpf
-
-/* all zlib structs in zlib.h and zconf.h */
-#  define gz_header_s           z_gz_header_s
-#  define internal_state        z_internal_state
-
-#endif
-
-#if defined(__MSDOS__) && !defined(MSDOS)
-#  define MSDOS
-#endif
-#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2)
-#  define OS2
-#endif
-#if defined(_WINDOWS) && !defined(WINDOWS)
-#  define WINDOWS
-#endif
-#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__)
-#  ifndef WIN32
-#    define WIN32
-#  endif
-#endif
-#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32)
-#  if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__)
-#    ifndef SYS16BIT
-#      define SYS16BIT
-#    endif
-#  endif
-#endif
-
-/*
- * Compile with -DMAXSEG_64K if the alloc function cannot allocate more
- * than 64k bytes at a time (needed on systems with 16-bit int).
- */
-#ifdef SYS16BIT
-#  define MAXSEG_64K
-#endif
-#ifdef MSDOS
-#  define UNALIGNED_OK
-#endif
-
-#ifdef __STDC_VERSION__
-#  ifndef STDC
-#    define STDC
-#  endif
-#  if __STDC_VERSION__ >= 199901L
-#    ifndef STDC99
-#      define STDC99
-#    endif
-#  endif
-#endif
-#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus))
-#  define STDC
-#endif
-#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__))
-#  define STDC
-#endif
-#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32))
-#  define STDC
-#endif
-#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__))
-#  define STDC
-#endif
-
-#if defined(__OS400__) && !defined(STDC)    /* iSeries (formerly AS/400). */
-#  define STDC
-#endif
-
-#ifndef STDC
-#  ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */
-#    define const       /* note: need a more gentle solution here */
-#  endif
-#endif
-
-#if defined(ZLIB_CONST) && !defined(z_const)
-#  define z_const const
-#else
-#  define z_const
-#endif
-
-#ifdef Z_SOLO
-   typedef unsigned long z_size_t;
-#else
-#  define z_longlong long long
-#  if defined(NO_SIZE_T)
-     typedef unsigned NO_SIZE_T z_size_t;
-#  elif defined(STDC)
-#    include <stddef.h>
-     typedef size_t z_size_t;
-#  else
-     typedef unsigned long z_size_t;
-#  endif
-#  undef z_longlong
-#endif
-
-/* Maximum value for memLevel in deflateInit2 */
-#ifndef MAX_MEM_LEVEL
-#  ifdef MAXSEG_64K
-#    define MAX_MEM_LEVEL 8
-#  else
-#    define MAX_MEM_LEVEL 9
-#  endif
-#endif
-
-/* Maximum value for windowBits in deflateInit2 and inflateInit2.
- * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files
- * created by gzip. (Files created by minigzip can still be extracted by
- * gzip.)
- */
-#ifndef MAX_WBITS
-#  define MAX_WBITS   15 /* 32K LZ77 window */
-#endif
-
-/* The memory requirements for deflate are (in bytes):
-            (1 << (windowBits+2)) +  (1 << (memLevel+9))
- that is: 128K for windowBits=15  +  128K for memLevel = 8  (default values)
- plus a few kilobytes for small objects. For example, if you want to reduce
- the default memory requirements from 256K to 128K, compile with
-     make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7"
- Of course this will generally degrade compression (there's no free lunch).
-
-   The memory requirements for inflate are (in bytes) 1 << windowBits
- that is, 32K for windowBits=15 (default value) plus about 7 kilobytes
- for small objects.
-*/
-
-                        /* Type declarations */
-
-#ifndef OF /* function prototypes */
-#  ifdef STDC
-#    define OF(args)  args
-#  else
-#    define OF(args)  ()
-#  endif
-#endif
-
-#ifndef Z_ARG /* function prototypes for stdarg */
-#  if defined(STDC) || defined(Z_HAVE_STDARG_H)
-#    define Z_ARG(args)  args
-#  else
-#    define Z_ARG(args)  ()
-#  endif
-#endif
-
-/* The following definitions for FAR are needed only for MSDOS mixed
- * model programming (small or medium model with some far allocations).
- * This was tested only with MSC; for other MSDOS compilers you may have
- * to define NO_MEMCPY in zutil.h.  If you don't need the mixed model,
- * just define FAR to be empty.
- */
-#ifdef SYS16BIT
-#  if defined(M_I86SM) || defined(M_I86MM)
-     /* MSC small or medium model */
-#    define SMALL_MEDIUM
-#    ifdef _MSC_VER
-#      define FAR _far
-#    else
-#      define FAR far
-#    endif
-#  endif
-#  if (defined(__SMALL__) || defined(__MEDIUM__))
-     /* Turbo C small or medium model */
-#    define SMALL_MEDIUM
-#    ifdef __BORLANDC__
-#      define FAR _far
-#    else
-#      define FAR far
-#    endif
-#  endif
-#endif
-
-#if defined(WINDOWS) || defined(WIN32)
-   /* If building or using zlib as a DLL, define ZLIB_DLL.
-    * This is not mandatory, but it offers a little performance increase.
-    */
-#  ifdef ZLIB_DLL
-#    if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500))
-#      ifdef ZLIB_INTERNAL
-#        define ZEXTERN extern __declspec(dllexport)
-#      else
-#        define ZEXTERN extern __declspec(dllimport)
-#      endif
-#    endif
-#  endif  /* ZLIB_DLL */
-   /* If building or using zlib with the WINAPI/WINAPIV calling convention,
-    * define ZLIB_WINAPI.
-    * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI.
-    */
-#  ifdef ZLIB_WINAPI
-#    ifdef FAR
-#      undef FAR
-#    endif
-#    include <windows.h>
-     /* No need for _export, use ZLIB.DEF instead. */
-     /* For complete Windows compatibility, use WINAPI, not __stdcall. */
-#    define ZEXPORT WINAPI
-#    ifdef WIN32
-#      define ZEXPORTVA WINAPIV
-#    else
-#      define ZEXPORTVA FAR CDECL
-#    endif
-#  endif
-#endif
-
-#if defined (__BEOS__)
-#  ifdef ZLIB_DLL
-#    ifdef ZLIB_INTERNAL
-#      define ZEXPORT   __declspec(dllexport)
-#      define ZEXPORTVA __declspec(dllexport)
-#    else
-#      define ZEXPORT   __declspec(dllimport)
-#      define ZEXPORTVA __declspec(dllimport)
-#    endif
-#  endif
-#endif
-
-#ifndef ZEXTERN
-#  define ZEXTERN extern
-#endif
-#ifndef ZEXPORT
-#  define ZEXPORT
-#endif
-#ifndef ZEXPORTVA
-#  define ZEXPORTVA
-#endif
-
-#ifndef FAR
-#  define FAR
-#endif
-
-#if !defined(__MACTYPES__)
-typedef unsigned char  Byte;  /* 8 bits */
-#endif
-typedef unsigned int   uInt;  /* 16 bits or more */
-typedef unsigned long  uLong; /* 32 bits or more */
-
-#ifdef SMALL_MEDIUM
-   /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */
-#  define Bytef Byte FAR
-#else
-   typedef Byte  FAR Bytef;
-#endif
-typedef char  FAR charf;
-typedef int   FAR intf;
-typedef uInt  FAR uIntf;
-typedef uLong FAR uLongf;
-
-#ifdef STDC
-   typedef void const *voidpc;
-   typedef void FAR   *voidpf;
-   typedef void       *voidp;
-#else
-   typedef Byte const *voidpc;
-   typedef Byte FAR   *voidpf;
-   typedef Byte       *voidp;
-#endif
-
-#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC)
-#  include <limits.h>
-#  if (UINT_MAX == 0xffffffffUL)
-#    define Z_U4 unsigned
-#  elif (ULONG_MAX == 0xffffffffUL)
-#    define Z_U4 unsigned long
-#  elif (USHRT_MAX == 0xffffffffUL)
-#    define Z_U4 unsigned short
-#  endif
-#endif
-
-#ifdef Z_U4
-   typedef Z_U4 z_crc_t;
-#else
-   typedef unsigned long z_crc_t;
-#endif
-
-#ifdef HAVE_UNISTD_H    /* may be set to #if 1 by ./configure */
-#  define Z_HAVE_UNISTD_H
-#endif
-
-#ifdef HAVE_STDARG_H    /* may be set to #if 1 by ./configure */
-#  define Z_HAVE_STDARG_H
-#endif
-
-#ifdef STDC
-#  ifndef Z_SOLO
-#    include <sys/types.h>      /* for off_t */
-#  endif
-#endif
-
-#if defined(STDC) || defined(Z_HAVE_STDARG_H)
-#  ifndef Z_SOLO
-#    include <stdarg.h>         /* for va_list */
-#  endif
-#endif
-
-#ifdef _WIN32
-#  ifndef Z_SOLO
-#    include <stddef.h>         /* for wchar_t */
-#  endif
-#endif
-
-/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and
- * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even
- * though the former does not conform to the LFS document), but considering
- * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as
- * equivalently requesting no 64-bit operations
- */
-#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1
-#  undef _LARGEFILE64_SOURCE
-#endif
-
-#if defined(__WATCOMC__) && !defined(Z_HAVE_UNISTD_H)
-#  define Z_HAVE_UNISTD_H
-#endif
-#ifndef Z_SOLO
-#  if defined(Z_HAVE_UNISTD_H) || defined(_LARGEFILE64_SOURCE)
-#    include <unistd.h>         /* for SEEK_*, off_t, and _LFS64_LARGEFILE */
-#    ifdef VMS
-#      include <unixio.h>       /* for off_t */
-#    endif
-#    ifndef z_off_t
-#      define z_off_t off_t
-#    endif
-#  endif
-#endif
-
-#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0
-#  define Z_LFS64
-#endif
-
-#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64)
-#  define Z_LARGE64
-#endif
-
-#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64)
-#  define Z_WANT64
-#endif
-
-#if !defined(SEEK_SET) && !defined(Z_SOLO)
-#  define SEEK_SET        0       /* Seek from beginning of file.  */
-#  define SEEK_CUR        1       /* Seek from current position.  */
-#  define SEEK_END        2       /* Set file pointer to EOF plus "offset" */
-#endif
-
-#ifndef z_off_t
-#  define z_off_t long
-#endif
-
-#if !defined(_WIN32) && defined(Z_LARGE64)
-#  define z_off64_t off64_t
-#else
-#  if defined(_WIN32) && !defined(__GNUC__) && !defined(Z_SOLO)
-#    define z_off64_t __int64
-#  else
-#    define z_off64_t z_off_t
-#  endif
-#endif
-
-/* MVS linker does not support external names larger than 8 bytes */
-#if defined(__MVS__)
-  #pragma map(deflateInit_,"DEIN")
-  #pragma map(deflateInit2_,"DEIN2")
-  #pragma map(deflateEnd,"DEEND")
-  #pragma map(deflateBound,"DEBND")
-  #pragma map(inflateInit_,"ININ")
-  #pragma map(inflateInit2_,"ININ2")
-  #pragma map(inflateEnd,"INEND")
-  #pragma map(inflateSync,"INSY")
-  #pragma map(inflateSetDictionary,"INSEDI")
-  #pragma map(compressBound,"CMBND")
-  #pragma map(inflate_table,"INTABL")
-  #pragma map(inflate_fast,"INFA")
-  #pragma map(inflate_copyright,"INCOPY")
-#endif
-
-#endif /* ZCONF_H */
diff --git a/dep/zlib/include/zlib.h b/dep/zlib/include/zlib.h
deleted file mode 100644
index f09cdaf1e..000000000
--- a/dep/zlib/include/zlib.h
+++ /dev/null
@@ -1,1912 +0,0 @@
-/* zlib.h -- interface of the 'zlib' general purpose compression library
-  version 1.2.11, January 15th, 2017
-
-  Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler
-
-  This software is provided 'as-is', without any express or implied
-  warranty.  In no event will the authors be held liable for any damages
-  arising from the use of this software.
-
-  Permission is granted to anyone to use this software for any purpose,
-  including commercial applications, and to alter it and redistribute it
-  freely, subject to the following restrictions:
-
-  1. The origin of this software must not be misrepresented; you must not
-     claim that you wrote the original software. If you use this software
-     in a product, an acknowledgment in the product documentation would be
-     appreciated but is not required.
-  2. Altered source versions must be plainly marked as such, and must not be
-     misrepresented as being the original software.
-  3. This notice may not be removed or altered from any source distribution.
-
-  Jean-loup Gailly        Mark Adler
-  jloup@gzip.org          madler@alumni.caltech.edu
-
-
-  The data format used by the zlib library is described by RFCs (Request for
-  Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950
-  (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format).
-*/
-
-#ifndef ZLIB_H
-#define ZLIB_H
-
-#include "zconf.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define ZLIB_VERSION "1.2.11"
-#define ZLIB_VERNUM 0x12b0
-#define ZLIB_VER_MAJOR 1
-#define ZLIB_VER_MINOR 2
-#define ZLIB_VER_REVISION 11
-#define ZLIB_VER_SUBREVISION 0
-
-/*
-    The 'zlib' compression library provides in-memory compression and
-  decompression functions, including integrity checks of the uncompressed data.
-  This version of the library supports only one compression method (deflation)
-  but other algorithms will be added later and will have the same stream
-  interface.
-
-    Compression can be done in a single step if the buffers are large enough,
-  or can be done by repeated calls of the compression function.  In the latter
-  case, the application must provide more input and/or consume the output
-  (providing more output space) before each call.
-
-    The compressed data format used by default by the in-memory functions is
-  the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped
-  around a deflate stream, which is itself documented in RFC 1951.
-
-    The library also supports reading and writing files in gzip (.gz) format
-  with an interface similar to that of stdio using the functions that start
-  with "gz".  The gzip format is different from the zlib format.  gzip is a
-  gzip wrapper, documented in RFC 1952, wrapped around a deflate stream.
-
-    This library can optionally read and write gzip and raw deflate streams in
-  memory as well.
-
-    The zlib format was designed to be compact and fast for use in memory
-  and on communications channels.  The gzip format was designed for single-
-  file compression on file systems, has a larger header than zlib to maintain
-  directory information, and uses a different, slower check method than zlib.
-
-    The library does not install any signal handler.  The decoder checks
-  the consistency of the compressed data, so the library should never crash
-  even in the case of corrupted input.
-*/
-
-typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size));
-typedef void   (*free_func)  OF((voidpf opaque, voidpf address));
-
-struct internal_state;
-
-typedef struct z_stream_s {
-    z_const Bytef *next_in;     /* next input byte */
-    uInt     avail_in;  /* number of bytes available at next_in */
-    uLong    total_in;  /* total number of input bytes read so far */
-
-    Bytef    *next_out; /* next output byte will go here */
-    uInt     avail_out; /* remaining free space at next_out */
-    uLong    total_out; /* total number of bytes output so far */
-
-    z_const char *msg;  /* last error message, NULL if no error */
-    struct internal_state FAR *state; /* not visible by applications */
-
-    alloc_func zalloc;  /* used to allocate the internal state */
-    free_func  zfree;   /* used to free the internal state */
-    voidpf     opaque;  /* private data object passed to zalloc and zfree */
-
-    int     data_type;  /* best guess about the data type: binary or text
-                           for deflate, or the decoding state for inflate */
-    uLong   adler;      /* Adler-32 or CRC-32 value of the uncompressed data */
-    uLong   reserved;   /* reserved for future use */
-} z_stream;
-
-typedef z_stream FAR *z_streamp;
-
-/*
-     gzip header information passed to and from zlib routines.  See RFC 1952
-  for more details on the meanings of these fields.
-*/
-typedef struct gz_header_s {
-    int     text;       /* true if compressed data believed to be text */
-    uLong   time;       /* modification time */
-    int     xflags;     /* extra flags (not used when writing a gzip file) */
-    int     os;         /* operating system */
-    Bytef   *extra;     /* pointer to extra field or Z_NULL if none */
-    uInt    extra_len;  /* extra field length (valid if extra != Z_NULL) */
-    uInt    extra_max;  /* space at extra (only when reading header) */
-    Bytef   *name;      /* pointer to zero-terminated file name or Z_NULL */
-    uInt    name_max;   /* space at name (only when reading header) */
-    Bytef   *comment;   /* pointer to zero-terminated comment or Z_NULL */
-    uInt    comm_max;   /* space at comment (only when reading header) */
-    int     hcrc;       /* true if there was or will be a header crc */
-    int     done;       /* true when done reading gzip header (not used
-                           when writing a gzip file) */
-} gz_header;
-
-typedef gz_header FAR *gz_headerp;
-
-/*
-     The application must update next_in and avail_in when avail_in has dropped
-   to zero.  It must update next_out and avail_out when avail_out has dropped
-   to zero.  The application must initialize zalloc, zfree and opaque before
-   calling the init function.  All other fields are set by the compression
-   library and must not be updated by the application.
-
-     The opaque value provided by the application will be passed as the first
-   parameter for calls of zalloc and zfree.  This can be useful for custom
-   memory management.  The compression library attaches no meaning to the
-   opaque value.
-
-     zalloc must return Z_NULL if there is not enough memory for the object.
-   If zlib is used in a multi-threaded application, zalloc and zfree must be
-   thread safe.  In that case, zlib is thread-safe.  When zalloc and zfree are
-   Z_NULL on entry to the initialization function, they are set to internal
-   routines that use the standard library functions malloc() and free().
-
-     On 16-bit systems, the functions zalloc and zfree must be able to allocate
-   exactly 65536 bytes, but will not be required to allocate more than this if
-   the symbol MAXSEG_64K is defined (see zconf.h).  WARNING: On MSDOS, pointers
-   returned by zalloc for objects of exactly 65536 bytes *must* have their
-   offset normalized to zero.  The default allocation function provided by this
-   library ensures this (see zutil.c).  To reduce memory requirements and avoid
-   any allocation of 64K objects, at the expense of compression ratio, compile
-   the library with -DMAX_WBITS=14 (see zconf.h).
-
-     The fields total_in and total_out can be used for statistics or progress
-   reports.  After compression, total_in holds the total size of the
-   uncompressed data and may be saved for use by the decompressor (particularly
-   if the decompressor wants to decompress everything in a single step).
-*/
-
-                        /* constants */
-
-#define Z_NO_FLUSH      0
-#define Z_PARTIAL_FLUSH 1
-#define Z_SYNC_FLUSH    2
-#define Z_FULL_FLUSH    3
-#define Z_FINISH        4
-#define Z_BLOCK         5
-#define Z_TREES         6
-/* Allowed flush values; see deflate() and inflate() below for details */
-
-#define Z_OK            0
-#define Z_STREAM_END    1
-#define Z_NEED_DICT     2
-#define Z_ERRNO        (-1)
-#define Z_STREAM_ERROR (-2)
-#define Z_DATA_ERROR   (-3)
-#define Z_MEM_ERROR    (-4)
-#define Z_BUF_ERROR    (-5)
-#define Z_VERSION_ERROR (-6)
-/* Return codes for the compression/decompression functions. Negative values
- * are errors, positive values are used for special but normal events.
- */
-
-#define Z_NO_COMPRESSION         0
-#define Z_BEST_SPEED             1
-#define Z_BEST_COMPRESSION       9
-#define Z_DEFAULT_COMPRESSION  (-1)
-/* compression levels */
-
-#define Z_FILTERED            1
-#define Z_HUFFMAN_ONLY        2
-#define Z_RLE                 3
-#define Z_FIXED               4
-#define Z_DEFAULT_STRATEGY    0
-/* compression strategy; see deflateInit2() below for details */
-
-#define Z_BINARY   0
-#define Z_TEXT     1
-#define Z_ASCII    Z_TEXT   /* for compatibility with 1.2.2 and earlier */
-#define Z_UNKNOWN  2
-/* Possible values of the data_type field for deflate() */
-
-#define Z_DEFLATED   8
-/* The deflate compression method (the only one supported in this version) */
-
-#define Z_NULL  0  /* for initializing zalloc, zfree, opaque */
-
-#define zlib_version zlibVersion()
-/* for compatibility with versions < 1.0.2 */
-
-
-                        /* basic functions */
-
-ZEXTERN const char * ZEXPORT zlibVersion OF((void));
-/* The application can compare zlibVersion and ZLIB_VERSION for consistency.
-   If the first character differs, the library code actually used is not
-   compatible with the zlib.h header file used by the application.  This check
-   is automatically made by deflateInit and inflateInit.
- */
-
-/*
-ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level));
-
-     Initializes the internal stream state for compression.  The fields
-   zalloc, zfree and opaque must be initialized before by the caller.  If
-   zalloc and zfree are set to Z_NULL, deflateInit updates them to use default
-   allocation functions.
-
-     The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9:
-   1 gives best speed, 9 gives best compression, 0 gives no compression at all
-   (the input data is simply copied a block at a time).  Z_DEFAULT_COMPRESSION
-   requests a default compromise between speed and compression (currently
-   equivalent to level 6).
-
-     deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
-   memory, Z_STREAM_ERROR if level is not a valid compression level, or
-   Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible
-   with the version assumed by the caller (ZLIB_VERSION).  msg is set to null
-   if there is no error message.  deflateInit does not perform any compression:
-   this will be done by deflate().
-*/
-
-
-ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
-/*
-    deflate compresses as much data as possible, and stops when the input
-  buffer becomes empty or the output buffer becomes full.  It may introduce
-  some output latency (reading input without producing any output) except when
-  forced to flush.
-
-    The detailed semantics are as follows.  deflate performs one or both of the
-  following actions:
-
-  - Compress more input starting at next_in and update next_in and avail_in
-    accordingly.  If not all input can be processed (because there is not
-    enough room in the output buffer), next_in and avail_in are updated and
-    processing will resume at this point for the next call of deflate().
-
-  - Generate more output starting at next_out and update next_out and avail_out
-    accordingly.  This action is forced if the parameter flush is non zero.
-    Forcing flush frequently degrades the compression ratio, so this parameter
-    should be set only when necessary.  Some output may be provided even if
-    flush is zero.
-
-    Before the call of deflate(), the application should ensure that at least
-  one of the actions is possible, by providing more input and/or consuming more
-  output, and updating avail_in or avail_out accordingly; avail_out should
-  never be zero before the call.  The application can consume the compressed
-  output when it wants, for example when the output buffer is full (avail_out
-  == 0), or after each call of deflate().  If deflate returns Z_OK and with
-  zero avail_out, it must be called again after making room in the output
-  buffer because there might be more output pending. See deflatePending(),
-  which can be used if desired to determine whether or not there is more ouput
-  in that case.
-
-    Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to
-  decide how much data to accumulate before producing output, in order to
-  maximize compression.
-
-    If the parameter flush is set to Z_SYNC_FLUSH, all pending output is
-  flushed to the output buffer and the output is aligned on a byte boundary, so
-  that the decompressor can get all input data available so far.  (In
-  particular avail_in is zero after the call if enough output space has been
-  provided before the call.) Flushing may degrade compression for some
-  compression algorithms and so it should be used only when necessary.  This
-  completes the current deflate block and follows it with an empty stored block
-  that is three bits plus filler bits to the next byte, followed by four bytes
-  (00 00 ff ff).
-
-    If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the
-  output buffer, but the output is not aligned to a byte boundary.  All of the
-  input data so far will be available to the decompressor, as for Z_SYNC_FLUSH.
-  This completes the current deflate block and follows it with an empty fixed
-  codes block that is 10 bits long.  This assures that enough bytes are output
-  in order for the decompressor to finish the block before the empty fixed
-  codes block.
-
-    If flush is set to Z_BLOCK, a deflate block is completed and emitted, as
-  for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to
-  seven bits of the current block are held to be written as the next byte after
-  the next deflate block is completed.  In this case, the decompressor may not
-  be provided enough bits at this point in order to complete decompression of
-  the data provided so far to the compressor.  It may need to wait for the next
-  block to be emitted.  This is for advanced applications that need to control
-  the emission of deflate blocks.
-
-    If flush is set to Z_FULL_FLUSH, all output is flushed as with
-  Z_SYNC_FLUSH, and the compression state is reset so that decompression can
-  restart from this point if previous compressed data has been damaged or if
-  random access is desired.  Using Z_FULL_FLUSH too often can seriously degrade
-  compression.
-
-    If deflate returns with avail_out == 0, this function must be called again
-  with the same value of the flush parameter and more output space (updated
-  avail_out), until the flush is complete (deflate returns with non-zero
-  avail_out).  In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that
-  avail_out is greater than six to avoid repeated flush markers due to
-  avail_out == 0 on return.
-
-    If the parameter flush is set to Z_FINISH, pending input is processed,
-  pending output is flushed and deflate returns with Z_STREAM_END if there was
-  enough output space.  If deflate returns with Z_OK or Z_BUF_ERROR, this
-  function must be called again with Z_FINISH and more output space (updated
-  avail_out) but no more input data, until it returns with Z_STREAM_END or an
-  error.  After deflate has returned Z_STREAM_END, the only possible operations
-  on the stream are deflateReset or deflateEnd.
-
-    Z_FINISH can be used in the first deflate call after deflateInit if all the
-  compression is to be done in a single step.  In order to complete in one
-  call, avail_out must be at least the value returned by deflateBound (see
-  below).  Then deflate is guaranteed to return Z_STREAM_END.  If not enough
-  output space is provided, deflate will not return Z_STREAM_END, and it must
-  be called again as described above.
-
-    deflate() sets strm->adler to the Adler-32 checksum of all input read
-  so far (that is, total_in bytes).  If a gzip stream is being generated, then
-  strm->adler will be the CRC-32 checksum of the input read so far.  (See
-  deflateInit2 below.)
-
-    deflate() may update strm->data_type if it can make a good guess about
-  the input data type (Z_BINARY or Z_TEXT).  If in doubt, the data is
-  considered binary.  This field is only for information purposes and does not
-  affect the compression algorithm in any manner.
-
-    deflate() returns Z_OK if some progress has been made (more input
-  processed or more output produced), Z_STREAM_END if all input has been
-  consumed and all output has been produced (only when flush is set to
-  Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example
-  if next_in or next_out was Z_NULL or the state was inadvertently written over
-  by the application), or Z_BUF_ERROR if no progress is possible (for example
-  avail_in or avail_out was zero).  Note that Z_BUF_ERROR is not fatal, and
-  deflate() can be called again with more input and more output space to
-  continue compressing.
-*/
-
-
-ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm));
-/*
-     All dynamically allocated data structures for this stream are freed.
-   This function discards any unprocessed input and does not flush any pending
-   output.
-
-     deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the
-   stream state was inconsistent, Z_DATA_ERROR if the stream was freed
-   prematurely (some input or output was discarded).  In the error case, msg
-   may be set but then points to a static string (which must not be
-   deallocated).
-*/
-
-
-/*
-ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm));
-
-     Initializes the internal stream state for decompression.  The fields
-   next_in, avail_in, zalloc, zfree and opaque must be initialized before by
-   the caller.  In the current version of inflate, the provided input is not
-   read or consumed.  The allocation of a sliding window will be deferred to
-   the first call of inflate (if the decompression does not complete on the
-   first call).  If zalloc and zfree are set to Z_NULL, inflateInit updates
-   them to use default allocation functions.
-
-     inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
-   memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
-   version assumed by the caller, or Z_STREAM_ERROR if the parameters are
-   invalid, such as a null pointer to the structure.  msg is set to null if
-   there is no error message.  inflateInit does not perform any decompression.
-   Actual decompression will be done by inflate().  So next_in, and avail_in,
-   next_out, and avail_out are unused and unchanged.  The current
-   implementation of inflateInit() does not process any header information --
-   that is deferred until inflate() is called.
-*/
-
-
-ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush));
-/*
-    inflate decompresses as much data as possible, and stops when the input
-  buffer becomes empty or the output buffer becomes full.  It may introduce
-  some output latency (reading input without producing any output) except when
-  forced to flush.
-
-  The detailed semantics are as follows.  inflate performs one or both of the
-  following actions:
-
-  - Decompress more input starting at next_in and update next_in and avail_in
-    accordingly.  If not all input can be processed (because there is not
-    enough room in the output buffer), then next_in and avail_in are updated
-    accordingly, and processing will resume at this point for the next call of
-    inflate().
-
-  - Generate more output starting at next_out and update next_out and avail_out
-    accordingly.  inflate() provides as much output as possible, until there is
-    no more input data or no more space in the output buffer (see below about
-    the flush parameter).
-
-    Before the call of inflate(), the application should ensure that at least
-  one of the actions is possible, by providing more input and/or consuming more
-  output, and updating the next_* and avail_* values accordingly.  If the
-  caller of inflate() does not provide both available input and available
-  output space, it is possible that there will be no progress made.  The
-  application can consume the uncompressed output when it wants, for example
-  when the output buffer is full (avail_out == 0), or after each call of
-  inflate().  If inflate returns Z_OK and with zero avail_out, it must be
-  called again after making room in the output buffer because there might be
-  more output pending.
-
-    The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH,
-  Z_BLOCK, or Z_TREES.  Z_SYNC_FLUSH requests that inflate() flush as much
-  output as possible to the output buffer.  Z_BLOCK requests that inflate()
-  stop if and when it gets to the next deflate block boundary.  When decoding
-  the zlib or gzip format, this will cause inflate() to return immediately
-  after the header and before the first block.  When doing a raw inflate,
-  inflate() will go ahead and process the first block, and will return when it
-  gets to the end of that block, or when it runs out of data.
-
-    The Z_BLOCK option assists in appending to or combining deflate streams.
-  To assist in this, on return inflate() always sets strm->data_type to the
-  number of unused bits in the last byte taken from strm->next_in, plus 64 if
-  inflate() is currently decoding the last block in the deflate stream, plus
-  128 if inflate() returned immediately after decoding an end-of-block code or
-  decoding the complete header up to just before the first byte of the deflate
-  stream.  The end-of-block will not be indicated until all of the uncompressed
-  data from that block has been written to strm->next_out.  The number of
-  unused bits may in general be greater than seven, except when bit 7 of
-  data_type is set, in which case the number of unused bits will be less than
-  eight.  data_type is set as noted here every time inflate() returns for all
-  flush options, and so can be used to determine the amount of currently
-  consumed input in bits.
-
-    The Z_TREES option behaves as Z_BLOCK does, but it also returns when the
-  end of each deflate block header is reached, before any actual data in that
-  block is decoded.  This allows the caller to determine the length of the
-  deflate block header for later use in random access within a deflate block.
-  256 is added to the value of strm->data_type when inflate() returns
-  immediately after reaching the end of the deflate block header.
-
-    inflate() should normally be called until it returns Z_STREAM_END or an
-  error.  However if all decompression is to be performed in a single step (a
-  single call of inflate), the parameter flush should be set to Z_FINISH.  In
-  this case all pending input is processed and all pending output is flushed;
-  avail_out must be large enough to hold all of the uncompressed data for the
-  operation to complete.  (The size of the uncompressed data may have been
-  saved by the compressor for this purpose.)  The use of Z_FINISH is not
-  required to perform an inflation in one step.  However it may be used to
-  inform inflate that a faster approach can be used for the single inflate()
-  call.  Z_FINISH also informs inflate to not maintain a sliding window if the
-  stream completes, which reduces inflate's memory footprint.  If the stream
-  does not complete, either because not all of the stream is provided or not
-  enough output space is provided, then a sliding window will be allocated and
-  inflate() can be called again to continue the operation as if Z_NO_FLUSH had
-  been used.
-
-     In this implementation, inflate() always flushes as much output as
-  possible to the output buffer, and always uses the faster approach on the
-  first call.  So the effects of the flush parameter in this implementation are
-  on the return value of inflate() as noted below, when inflate() returns early
-  when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of
-  memory for a sliding window when Z_FINISH is used.
-
-     If a preset dictionary is needed after this call (see inflateSetDictionary
-  below), inflate sets strm->adler to the Adler-32 checksum of the dictionary
-  chosen by the compressor and returns Z_NEED_DICT; otherwise it sets
-  strm->adler to the Adler-32 checksum of all output produced so far (that is,
-  total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described
-  below.  At the end of the stream, inflate() checks that its computed Adler-32
-  checksum is equal to that saved by the compressor and returns Z_STREAM_END
-  only if the checksum is correct.
-
-    inflate() can decompress and check either zlib-wrapped or gzip-wrapped
-  deflate data.  The header type is detected automatically, if requested when
-  initializing with inflateInit2().  Any information contained in the gzip
-  header is not retained unless inflateGetHeader() is used.  When processing
-  gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output
-  produced so far.  The CRC-32 is checked against the gzip trailer, as is the
-  uncompressed length, modulo 2^32.
-
-    inflate() returns Z_OK if some progress has been made (more input processed
-  or more output produced), Z_STREAM_END if the end of the compressed data has
-  been reached and all uncompressed output has been produced, Z_NEED_DICT if a
-  preset dictionary is needed at this point, Z_DATA_ERROR if the input data was
-  corrupted (input stream not conforming to the zlib format or incorrect check
-  value, in which case strm->msg points to a string with a more specific
-  error), Z_STREAM_ERROR if the stream structure was inconsistent (for example
-  next_in or next_out was Z_NULL, or the state was inadvertently written over
-  by the application), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR
-  if no progress was possible or if there was not enough room in the output
-  buffer when Z_FINISH is used.  Note that Z_BUF_ERROR is not fatal, and
-  inflate() can be called again with more input and more output space to
-  continue decompressing.  If Z_DATA_ERROR is returned, the application may
-  then call inflateSync() to look for a good compression block if a partial
-  recovery of the data is to be attempted.
-*/
-
-
-ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm));
-/*
-     All dynamically allocated data structures for this stream are freed.
-   This function discards any unprocessed input and does not flush any pending
-   output.
-
-     inflateEnd returns Z_OK if success, or Z_STREAM_ERROR if the stream state
-   was inconsistent.
-*/
-
-
-                        /* Advanced functions */
-
-/*
-    The following functions are needed only in some special applications.
-*/
-
-/*
-ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm,
-                                     int  level,
-                                     int  method,
-                                     int  windowBits,
-                                     int  memLevel,
-                                     int  strategy));
-
-     This is another version of deflateInit with more compression options.  The
-   fields next_in, zalloc, zfree and opaque must be initialized before by the
-   caller.
-
-     The method parameter is the compression method.  It must be Z_DEFLATED in
-   this version of the library.
-
-     The windowBits parameter is the base two logarithm of the window size
-   (the size of the history buffer).  It should be in the range 8..15 for this
-   version of the library.  Larger values of this parameter result in better
-   compression at the expense of memory usage.  The default value is 15 if
-   deflateInit is used instead.
-
-     For the current implementation of deflate(), a windowBits value of 8 (a
-   window size of 256 bytes) is not supported.  As a result, a request for 8
-   will result in 9 (a 512-byte window).  In that case, providing 8 to
-   inflateInit2() will result in an error when the zlib header with 9 is
-   checked against the initialization of inflate().  The remedy is to not use 8
-   with deflateInit2() with this initialization, or at least in that case use 9
-   with inflateInit2().
-
-     windowBits can also be -8..-15 for raw deflate.  In this case, -windowBits
-   determines the window size.  deflate() will then generate raw deflate data
-   with no zlib header or trailer, and will not compute a check value.
-
-     windowBits can also be greater than 15 for optional gzip encoding.  Add
-   16 to windowBits to write a simple gzip header and trailer around the
-   compressed data instead of a zlib wrapper.  The gzip header will have no
-   file name, no extra data, no comment, no modification time (set to zero), no
-   header crc, and the operating system will be set to the appropriate value,
-   if the operating system was determined at compile time.  If a gzip stream is
-   being written, strm->adler is a CRC-32 instead of an Adler-32.
-
-     For raw deflate or gzip encoding, a request for a 256-byte window is
-   rejected as invalid, since only the zlib header provides a means of
-   transmitting the window size to the decompressor.
-
-     The memLevel parameter specifies how much memory should be allocated
-   for the internal compression state.  memLevel=1 uses minimum memory but is
-   slow and reduces compression ratio; memLevel=9 uses maximum memory for
-   optimal speed.  The default value is 8.  See zconf.h for total memory usage
-   as a function of windowBits and memLevel.
-
-     The strategy parameter is used to tune the compression algorithm.  Use the
-   value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a
-   filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no
-   string match), or Z_RLE to limit match distances to one (run-length
-   encoding).  Filtered data consists mostly of small values with a somewhat
-   random distribution.  In this case, the compression algorithm is tuned to
-   compress them better.  The effect of Z_FILTERED is to force more Huffman
-   coding and less string matching; it is somewhat intermediate between
-   Z_DEFAULT_STRATEGY and Z_HUFFMAN_ONLY.  Z_RLE is designed to be almost as
-   fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data.  The
-   strategy parameter only affects the compression ratio but not the
-   correctness of the compressed output even if it is not set appropriately.
-   Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler
-   decoder for special applications.
-
-     deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
-   memory, Z_STREAM_ERROR if any parameter is invalid (such as an invalid
-   method), or Z_VERSION_ERROR if the zlib library version (zlib_version) is
-   incompatible with the version assumed by the caller (ZLIB_VERSION).  msg is
-   set to null if there is no error message.  deflateInit2 does not perform any
-   compression: this will be done by deflate().
-*/
-
-ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm,
-                                             const Bytef *dictionary,
-                                             uInt  dictLength));
-/*
-     Initializes the compression dictionary from the given byte sequence
-   without producing any compressed output.  When using the zlib format, this
-   function must be called immediately after deflateInit, deflateInit2 or
-   deflateReset, and before any call of deflate.  When doing raw deflate, this
-   function must be called either before any call of deflate, or immediately
-   after the completion of a deflate block, i.e. after all input has been
-   consumed and all output has been delivered when using any of the flush
-   options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH.  The
-   compressor and decompressor must use exactly the same dictionary (see
-   inflateSetDictionary).
-
-     The dictionary should consist of strings (byte sequences) that are likely
-   to be encountered later in the data to be compressed, with the most commonly
-   used strings preferably put towards the end of the dictionary.  Using a
-   dictionary is most useful when the data to be compressed is short and can be
-   predicted with good accuracy; the data can then be compressed better than
-   with the default empty dictionary.
-
-     Depending on the size of the compression data structures selected by
-   deflateInit or deflateInit2, a part of the dictionary may in effect be
-   discarded, for example if the dictionary is larger than the window size
-   provided in deflateInit or deflateInit2.  Thus the strings most likely to be
-   useful should be put at the end of the dictionary, not at the front.  In
-   addition, the current implementation of deflate will use at most the window
-   size minus 262 bytes of the provided dictionary.
-
-     Upon return of this function, strm->adler is set to the Adler-32 value
-   of the dictionary; the decompressor may later use this value to determine
-   which dictionary has been used by the compressor.  (The Adler-32 value
-   applies to the whole dictionary even if only a subset of the dictionary is
-   actually used by the compressor.) If a raw deflate was requested, then the
-   Adler-32 value is not computed and strm->adler is not set.
-
-     deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
-   parameter is invalid (e.g.  dictionary being Z_NULL) or the stream state is
-   inconsistent (for example if deflate has already been called for this stream
-   or if not at a block boundary for raw deflate).  deflateSetDictionary does
-   not perform any compression: this will be done by deflate().
-*/
-
-ZEXTERN int ZEXPORT deflateGetDictionary OF((z_streamp strm,
-                                             Bytef *dictionary,
-                                             uInt  *dictLength));
-/*
-     Returns the sliding dictionary being maintained by deflate.  dictLength is
-   set to the number of bytes in the dictionary, and that many bytes are copied
-   to dictionary.  dictionary must have enough space, where 32768 bytes is
-   always enough.  If deflateGetDictionary() is called with dictionary equal to
-   Z_NULL, then only the dictionary length is returned, and nothing is copied.
-   Similary, if dictLength is Z_NULL, then it is not set.
-
-     deflateGetDictionary() may return a length less than the window size, even
-   when more than the window size in input has been provided. It may return up
-   to 258 bytes less in that case, due to how zlib's implementation of deflate
-   manages the sliding window and lookahead for matches, where matches can be
-   up to 258 bytes long. If the application needs the last window-size bytes of
-   input, then that would need to be saved by the application outside of zlib.
-
-     deflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the
-   stream state is inconsistent.
-*/
-
-ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest,
-                                    z_streamp source));
-/*
-     Sets the destination stream as a complete copy of the source stream.
-
-     This function can be useful when several compression strategies will be
-   tried, for example when there are several ways of pre-processing the input
-   data with a filter.  The streams that will be discarded should then be freed
-   by calling deflateEnd.  Note that deflateCopy duplicates the internal
-   compression state which can be quite large, so this strategy is slow and can
-   consume lots of memory.
-
-     deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
-   enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
-   (such as zalloc being Z_NULL).  msg is left unchanged in both source and
-   destination.
-*/
-
-ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm));
-/*
-     This function is equivalent to deflateEnd followed by deflateInit, but
-   does not free and reallocate the internal compression state.  The stream
-   will leave the compression level and any other attributes that may have been
-   set unchanged.
-
-     deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
-   stream state was inconsistent (such as zalloc or state being Z_NULL).
-*/
-
-ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm,
-                                      int level,
-                                      int strategy));
-/*
-     Dynamically update the compression level and compression strategy.  The
-   interpretation of level and strategy is as in deflateInit2().  This can be
-   used to switch between compression and straight copy of the input data, or
-   to switch to a different kind of input data requiring a different strategy.
-   If the compression approach (which is a function of the level) or the
-   strategy is changed, and if any input has been consumed in a previous
-   deflate() call, then the input available so far is compressed with the old
-   level and strategy using deflate(strm, Z_BLOCK).  There are three approaches
-   for the compression levels 0, 1..3, and 4..9 respectively.  The new level
-   and strategy will take effect at the next call of deflate().
-
-     If a deflate(strm, Z_BLOCK) is performed by deflateParams(), and it does
-   not have enough output space to complete, then the parameter change will not
-   take effect.  In this case, deflateParams() can be called again with the
-   same parameters and more output space to try again.
-
-     In order to assure a change in the parameters on the first try, the
-   deflate stream should be flushed using deflate() with Z_BLOCK or other flush
-   request until strm.avail_out is not zero, before calling deflateParams().
-   Then no more input data should be provided before the deflateParams() call.
-   If this is done, the old level and strategy will be applied to the data
-   compressed before deflateParams(), and the new level and strategy will be
-   applied to the the data compressed after deflateParams().
-
-     deflateParams returns Z_OK on success, Z_STREAM_ERROR if the source stream
-   state was inconsistent or if a parameter was invalid, or Z_BUF_ERROR if
-   there was not enough output space to complete the compression of the
-   available input data before a change in the strategy or approach.  Note that
-   in the case of a Z_BUF_ERROR, the parameters are not changed.  A return
-   value of Z_BUF_ERROR is not fatal, in which case deflateParams() can be
-   retried with more output space.
-*/
-
-ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm,
-                                    int good_length,
-                                    int max_lazy,
-                                    int nice_length,
-                                    int max_chain));
-/*
-     Fine tune deflate's internal compression parameters.  This should only be
-   used by someone who understands the algorithm used by zlib's deflate for
-   searching for the best matching string, and even then only by the most
-   fanatic optimizer trying to squeeze out the last compressed bit for their
-   specific input data.  Read the deflate.c source code for the meaning of the
-   max_lazy, good_length, nice_length, and max_chain parameters.
-
-     deflateTune() can be called after deflateInit() or deflateInit2(), and
-   returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream.
- */
-
-ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm,
-                                       uLong sourceLen));
-/*
-     deflateBound() returns an upper bound on the compressed size after
-   deflation of sourceLen bytes.  It must be called after deflateInit() or
-   deflateInit2(), and after deflateSetHeader(), if used.  This would be used
-   to allocate an output buffer for deflation in a single pass, and so would be
-   called before deflate().  If that first deflate() call is provided the
-   sourceLen input bytes, an output buffer allocated to the size returned by
-   deflateBound(), and the flush value Z_FINISH, then deflate() is guaranteed
-   to return Z_STREAM_END.  Note that it is possible for the compressed size to
-   be larger than the value returned by deflateBound() if flush options other
-   than Z_FINISH or Z_NO_FLUSH are used.
-*/
-
-ZEXTERN int ZEXPORT deflatePending OF((z_streamp strm,
-                                       unsigned *pending,
-                                       int *bits));
-/*
-     deflatePending() returns the number of bytes and bits of output that have
-   been generated, but not yet provided in the available output.  The bytes not
-   provided would be due to the available output space having being consumed.
-   The number of bits of output not provided are between 0 and 7, where they
-   await more bits to join them in order to fill out a full byte.  If pending
-   or bits are Z_NULL, then those values are not set.
-
-     deflatePending returns Z_OK if success, or Z_STREAM_ERROR if the source
-   stream state was inconsistent.
- */
-
-ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm,
-                                     int bits,
-                                     int value));
-/*
-     deflatePrime() inserts bits in the deflate output stream.  The intent
-   is that this function is used to start off the deflate output with the bits
-   leftover from a previous deflate stream when appending to it.  As such, this
-   function can only be used for raw deflate, and must be used before the first
-   deflate() call after a deflateInit2() or deflateReset().  bits must be less
-   than or equal to 16, and that many of the least significant bits of value
-   will be inserted in the output.
-
-     deflatePrime returns Z_OK if success, Z_BUF_ERROR if there was not enough
-   room in the internal buffer to insert the bits, or Z_STREAM_ERROR if the
-   source stream state was inconsistent.
-*/
-
-ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm,
-                                         gz_headerp head));
-/*
-     deflateSetHeader() provides gzip header information for when a gzip
-   stream is requested by deflateInit2().  deflateSetHeader() may be called
-   after deflateInit2() or deflateReset() and before the first call of
-   deflate().  The text, time, os, extra field, name, and comment information
-   in the provided gz_header structure are written to the gzip header (xflag is
-   ignored -- the extra flags are set according to the compression level).  The
-   caller must assure that, if not Z_NULL, name and comment are terminated with
-   a zero byte, and that if extra is not Z_NULL, that extra_len bytes are
-   available there.  If hcrc is true, a gzip header crc is included.  Note that
-   the current versions of the command-line version of gzip (up through version
-   1.3.x) do not support header crc's, and will report that it is a "multi-part
-   gzip file" and give up.
-
-     If deflateSetHeader is not used, the default gzip header has text false,
-   the time set to zero, and os set to 255, with no extra, name, or comment
-   fields.  The gzip header is returned to the default state by deflateReset().
-
-     deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
-   stream state was inconsistent.
-*/
-
-/*
-ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm,
-                                     int  windowBits));
-
-     This is another version of inflateInit with an extra parameter.  The
-   fields next_in, avail_in, zalloc, zfree and opaque must be initialized
-   before by the caller.
-
-     The windowBits parameter is the base two logarithm of the maximum window
-   size (the size of the history buffer).  It should be in the range 8..15 for
-   this version of the library.  The default value is 15 if inflateInit is used
-   instead.  windowBits must be greater than or equal to the windowBits value
-   provided to deflateInit2() while compressing, or it must be equal to 15 if
-   deflateInit2() was not used.  If a compressed stream with a larger window
-   size is given as input, inflate() will return with the error code
-   Z_DATA_ERROR instead of trying to allocate a larger window.
-
-     windowBits can also be zero to request that inflate use the window size in
-   the zlib header of the compressed stream.
-
-     windowBits can also be -8..-15 for raw inflate.  In this case, -windowBits
-   determines the window size.  inflate() will then process raw deflate data,
-   not looking for a zlib or gzip header, not generating a check value, and not
-   looking for any check values for comparison at the end of the stream.  This
-   is for use with other formats that use the deflate compressed data format
-   such as zip.  Those formats provide their own check values.  If a custom
-   format is developed using the raw deflate format for compressed data, it is
-   recommended that a check value such as an Adler-32 or a CRC-32 be applied to
-   the uncompressed data as is done in the zlib, gzip, and zip formats.  For
-   most applications, the zlib format should be used as is.  Note that comments
-   above on the use in deflateInit2() applies to the magnitude of windowBits.
-
-     windowBits can also be greater than 15 for optional gzip decoding.  Add
-   32 to windowBits to enable zlib and gzip decoding with automatic header
-   detection, or add 16 to decode only the gzip format (the zlib format will
-   return a Z_DATA_ERROR).  If a gzip stream is being decoded, strm->adler is a
-   CRC-32 instead of an Adler-32.  Unlike the gunzip utility and gzread() (see
-   below), inflate() will not automatically decode concatenated gzip streams.
-   inflate() will return Z_STREAM_END at the end of the gzip stream.  The state
-   would need to be reset to continue decoding a subsequent gzip stream.
-
-     inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
-   memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
-   version assumed by the caller, or Z_STREAM_ERROR if the parameters are
-   invalid, such as a null pointer to the structure.  msg is set to null if
-   there is no error message.  inflateInit2 does not perform any decompression
-   apart from possibly reading the zlib header if present: actual decompression
-   will be done by inflate().  (So next_in and avail_in may be modified, but
-   next_out and avail_out are unused and unchanged.) The current implementation
-   of inflateInit2() does not process any header information -- that is
-   deferred until inflate() is called.
-*/
-
-ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm,
-                                             const Bytef *dictionary,
-                                             uInt  dictLength));
-/*
-     Initializes the decompression dictionary from the given uncompressed byte
-   sequence.  This function must be called immediately after a call of inflate,
-   if that call returned Z_NEED_DICT.  The dictionary chosen by the compressor
-   can be determined from the Adler-32 value returned by that call of inflate.
-   The compressor and decompressor must use exactly the same dictionary (see
-   deflateSetDictionary).  For raw inflate, this function can be called at any
-   time to set the dictionary.  If the provided dictionary is smaller than the
-   window and there is already data in the window, then the provided dictionary
-   will amend what's there.  The application must insure that the dictionary
-   that was used for compression is provided.
-
-     inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
-   parameter is invalid (e.g.  dictionary being Z_NULL) or the stream state is
-   inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
-   expected one (incorrect Adler-32 value).  inflateSetDictionary does not
-   perform any decompression: this will be done by subsequent calls of
-   inflate().
-*/
-
-ZEXTERN int ZEXPORT inflateGetDictionary OF((z_streamp strm,
-                                             Bytef *dictionary,
-                                             uInt  *dictLength));
-/*
-     Returns the sliding dictionary being maintained by inflate.  dictLength is
-   set to the number of bytes in the dictionary, and that many bytes are copied
-   to dictionary.  dictionary must have enough space, where 32768 bytes is
-   always enough.  If inflateGetDictionary() is called with dictionary equal to
-   Z_NULL, then only the dictionary length is returned, and nothing is copied.
-   Similary, if dictLength is Z_NULL, then it is not set.
-
-     inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the
-   stream state is inconsistent.
-*/
-
-ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm));
-/*
-     Skips invalid compressed data until a possible full flush point (see above
-   for the description of deflate with Z_FULL_FLUSH) can be found, or until all
-   available input is skipped.  No output is provided.
-
-     inflateSync searches for a 00 00 FF FF pattern in the compressed data.
-   All full flush points have this pattern, but not all occurrences of this
-   pattern are full flush points.
-
-     inflateSync returns Z_OK if a possible full flush point has been found,
-   Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point
-   has been found, or Z_STREAM_ERROR if the stream structure was inconsistent.
-   In the success case, the application may save the current current value of
-   total_in which indicates where valid compressed data was found.  In the
-   error case, the application may repeatedly call inflateSync, providing more
-   input each time, until success or end of the input data.
-*/
-
-ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest,
-                                    z_streamp source));
-/*
-     Sets the destination stream as a complete copy of the source stream.
-
-     This function can be useful when randomly accessing a large stream.  The
-   first pass through the stream can periodically record the inflate state,
-   allowing restarting inflate at those points when randomly accessing the
-   stream.
-
-     inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
-   enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
-   (such as zalloc being Z_NULL).  msg is left unchanged in both source and
-   destination.
-*/
-
-ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm));
-/*
-     This function is equivalent to inflateEnd followed by inflateInit,
-   but does not free and reallocate the internal decompression state.  The
-   stream will keep attributes that may have been set by inflateInit2.
-
-     inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
-   stream state was inconsistent (such as zalloc or state being Z_NULL).
-*/
-
-ZEXTERN int ZEXPORT inflateReset2 OF((z_streamp strm,
-                                      int windowBits));
-/*
-     This function is the same as inflateReset, but it also permits changing
-   the wrap and window size requests.  The windowBits parameter is interpreted
-   the same as it is for inflateInit2.  If the window size is changed, then the
-   memory allocated for the window is freed, and the window will be reallocated
-   by inflate() if needed.
-
-     inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source
-   stream state was inconsistent (such as zalloc or state being Z_NULL), or if
-   the windowBits parameter is invalid.
-*/
-
-ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm,
-                                     int bits,
-                                     int value));
-/*
-     This function inserts bits in the inflate input stream.  The intent is
-   that this function is used to start inflating at a bit position in the
-   middle of a byte.  The provided bits will be used before any bytes are used
-   from next_in.  This function should only be used with raw inflate, and
-   should be used before the first inflate() call after inflateInit2() or
-   inflateReset().  bits must be less than or equal to 16, and that many of the
-   least significant bits of value will be inserted in the input.
-
-     If bits is negative, then the input stream bit buffer is emptied.  Then
-   inflatePrime() can be called again to put bits in the buffer.  This is used
-   to clear out bits leftover after feeding inflate a block description prior
-   to feeding inflate codes.
-
-     inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source
-   stream state was inconsistent.
-*/
-
-ZEXTERN long ZEXPORT inflateMark OF((z_streamp strm));
-/*
-     This function returns two values, one in the lower 16 bits of the return
-   value, and the other in the remaining upper bits, obtained by shifting the
-   return value down 16 bits.  If the upper value is -1 and the lower value is
-   zero, then inflate() is currently decoding information outside of a block.
-   If the upper value is -1 and the lower value is non-zero, then inflate is in
-   the middle of a stored block, with the lower value equaling the number of
-   bytes from the input remaining to copy.  If the upper value is not -1, then
-   it is the number of bits back from the current bit position in the input of
-   the code (literal or length/distance pair) currently being processed.  In
-   that case the lower value is the number of bytes already emitted for that
-   code.
-
-     A code is being processed if inflate is waiting for more input to complete
-   decoding of the code, or if it has completed decoding but is waiting for
-   more output space to write the literal or match data.
-
-     inflateMark() is used to mark locations in the input data for random
-   access, which may be at bit positions, and to note those cases where the
-   output of a code may span boundaries of random access blocks.  The current
-   location in the input stream can be determined from avail_in and data_type
-   as noted in the description for the Z_BLOCK flush parameter for inflate.
-
-     inflateMark returns the value noted above, or -65536 if the provided
-   source stream state was inconsistent.
-*/
-
-ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm,
-                                         gz_headerp head));
-/*
-     inflateGetHeader() requests that gzip header information be stored in the
-   provided gz_header structure.  inflateGetHeader() may be called after
-   inflateInit2() or inflateReset(), and before the first call of inflate().
-   As inflate() processes the gzip stream, head->done is zero until the header
-   is completed, at which time head->done is set to one.  If a zlib stream is
-   being decoded, then head->done is set to -1 to indicate that there will be
-   no gzip header information forthcoming.  Note that Z_BLOCK or Z_TREES can be
-   used to force inflate() to return immediately after header processing is
-   complete and before any actual data is decompressed.
-
-     The text, time, xflags, and os fields are filled in with the gzip header
-   contents.  hcrc is set to true if there is a header CRC.  (The header CRC
-   was valid if done is set to one.) If extra is not Z_NULL, then extra_max
-   contains the maximum number of bytes to write to extra.  Once done is true,
-   extra_len contains the actual extra field length, and extra contains the
-   extra field, or that field truncated if extra_max is less than extra_len.
-   If name is not Z_NULL, then up to name_max characters are written there,
-   terminated with a zero unless the length is greater than name_max.  If
-   comment is not Z_NULL, then up to comm_max characters are written there,
-   terminated with a zero unless the length is greater than comm_max.  When any
-   of extra, name, or comment are not Z_NULL and the respective field is not
-   present in the header, then that field is set to Z_NULL to signal its
-   absence.  This allows the use of deflateSetHeader() with the returned
-   structure to duplicate the header.  However if those fields are set to
-   allocated memory, then the application will need to save those pointers
-   elsewhere so that they can be eventually freed.
-
-     If inflateGetHeader is not used, then the header information is simply
-   discarded.  The header is always checked for validity, including the header
-   CRC if present.  inflateReset() will reset the process to discard the header
-   information.  The application would need to call inflateGetHeader() again to
-   retrieve the header from the next gzip stream.
-
-     inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
-   stream state was inconsistent.
-*/
-
-/*
-ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits,
-                                        unsigned char FAR *window));
-
-     Initialize the internal stream state for decompression using inflateBack()
-   calls.  The fields zalloc, zfree and opaque in strm must be initialized
-   before the call.  If zalloc and zfree are Z_NULL, then the default library-
-   derived memory allocation routines are used.  windowBits is the base two
-   logarithm of the window size, in the range 8..15.  window is a caller
-   supplied buffer of that size.  Except for special applications where it is
-   assured that deflate was used with small window sizes, windowBits must be 15
-   and a 32K byte window must be supplied to be able to decompress general
-   deflate streams.
-
-     See inflateBack() for the usage of these routines.
-
-     inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of
-   the parameters are invalid, Z_MEM_ERROR if the internal state could not be
-   allocated, or Z_VERSION_ERROR if the version of the library does not match
-   the version of the header file.
-*/
-
-typedef unsigned (*in_func) OF((void FAR *,
-                                z_const unsigned char FAR * FAR *));
-typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned));
-
-ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm,
-                                    in_func in, void FAR *in_desc,
-                                    out_func out, void FAR *out_desc));
-/*
-     inflateBack() does a raw inflate with a single call using a call-back
-   interface for input and output.  This is potentially more efficient than
-   inflate() for file i/o applications, in that it avoids copying between the
-   output and the sliding window by simply making the window itself the output
-   buffer.  inflate() can be faster on modern CPUs when used with large
-   buffers.  inflateBack() trusts the application to not change the output
-   buffer passed by the output function, at least until inflateBack() returns.
-
-     inflateBackInit() must be called first to allocate the internal state
-   and to initialize the state with the user-provided window buffer.
-   inflateBack() may then be used multiple times to inflate a complete, raw
-   deflate stream with each call.  inflateBackEnd() is then called to free the
-   allocated state.
-
-     A raw deflate stream is one with no zlib or gzip header or trailer.
-   This routine would normally be used in a utility that reads zip or gzip
-   files and writes out uncompressed files.  The utility would decode the
-   header and process the trailer on its own, hence this routine expects only
-   the raw deflate stream to decompress.  This is different from the default
-   behavior of inflate(), which expects a zlib header and trailer around the
-   deflate stream.
-
-     inflateBack() uses two subroutines supplied by the caller that are then
-   called by inflateBack() for input and output.  inflateBack() calls those
-   routines until it reads a complete deflate stream and writes out all of the
-   uncompressed data, or until it encounters an error.  The function's
-   parameters and return types are defined above in the in_func and out_func
-   typedefs.  inflateBack() will call in(in_desc, &buf) which should return the
-   number of bytes of provided input, and a pointer to that input in buf.  If
-   there is no input available, in() must return zero -- buf is ignored in that
-   case -- and inflateBack() will return a buffer error.  inflateBack() will
-   call out(out_desc, buf, len) to write the uncompressed data buf[0..len-1].
-   out() should return zero on success, or non-zero on failure.  If out()
-   returns non-zero, inflateBack() will return with an error.  Neither in() nor
-   out() are permitted to change the contents of the window provided to
-   inflateBackInit(), which is also the buffer that out() uses to write from.
-   The length written by out() will be at most the window size.  Any non-zero
-   amount of input may be provided by in().
-
-     For convenience, inflateBack() can be provided input on the first call by
-   setting strm->next_in and strm->avail_in.  If that input is exhausted, then
-   in() will be called.  Therefore strm->next_in must be initialized before
-   calling inflateBack().  If strm->next_in is Z_NULL, then in() will be called
-   immediately for input.  If strm->next_in is not Z_NULL, then strm->avail_in
-   must also be initialized, and then if strm->avail_in is not zero, input will
-   initially be taken from strm->next_in[0 ..  strm->avail_in - 1].
-
-     The in_desc and out_desc parameters of inflateBack() is passed as the
-   first parameter of in() and out() respectively when they are called.  These
-   descriptors can be optionally used to pass any information that the caller-
-   supplied in() and out() functions need to do their job.
-
-     On return, inflateBack() will set strm->next_in and strm->avail_in to
-   pass back any unused input that was provided by the last in() call.  The
-   return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR
-   if in() or out() returned an error, Z_DATA_ERROR if there was a format error
-   in the deflate stream (in which case strm->msg is set to indicate the nature
-   of the error), or Z_STREAM_ERROR if the stream was not properly initialized.
-   In the case of Z_BUF_ERROR, an input or output error can be distinguished
-   using strm->next_in which will be Z_NULL only if in() returned an error.  If
-   strm->next_in is not Z_NULL, then the Z_BUF_ERROR was due to out() returning
-   non-zero.  (in() will always be called before out(), so strm->next_in is
-   assured to be defined if out() returns non-zero.)  Note that inflateBack()
-   cannot return Z_OK.
-*/
-
-ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm));
-/*
-     All memory allocated by inflateBackInit() is freed.
-
-     inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream
-   state was inconsistent.
-*/
-
-ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void));
-/* Return flags indicating compile-time options.
-
-    Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other:
-     1.0: size of uInt
-     3.2: size of uLong
-     5.4: size of voidpf (pointer)
-     7.6: size of z_off_t
-
-    Compiler, assembler, and debug options:
-     8: ZLIB_DEBUG
-     9: ASMV or ASMINF -- use ASM code
-     10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention
-     11: 0 (reserved)
-
-    One-time table building (smaller code, but not thread-safe if true):
-     12: BUILDFIXED -- build static block decoding tables when needed
-     13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed
-     14,15: 0 (reserved)
-
-    Library content (indicates missing functionality):
-     16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking
-                          deflate code when not needed)
-     17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect
-                    and decode gzip streams (to avoid linking crc code)
-     18-19: 0 (reserved)
-
-    Operation variations (changes in library functionality):
-     20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate
-     21: FASTEST -- deflate algorithm with only one, lowest compression level
-     22,23: 0 (reserved)
-
-    The sprintf variant used by gzprintf (zero is best):
-     24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format
-     25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure!
-     26: 0 = returns value, 1 = void -- 1 means inferred string length returned
-
-    Remainder:
-     27-31: 0 (reserved)
- */
-
-#ifndef Z_SOLO
-
-                        /* utility functions */
-
-/*
-     The following utility functions are implemented on top of the basic
-   stream-oriented functions.  To simplify the interface, some default options
-   are assumed (compression level and memory usage, standard memory allocation
-   functions).  The source code of these utility functions can be modified if
-   you need special options.
-*/
-
-ZEXTERN int ZEXPORT compress OF((Bytef *dest,   uLongf *destLen,
-                                 const Bytef *source, uLong sourceLen));
-/*
-     Compresses the source buffer into the destination buffer.  sourceLen is
-   the byte length of the source buffer.  Upon entry, destLen is the total size
-   of the destination buffer, which must be at least the value returned by
-   compressBound(sourceLen).  Upon exit, destLen is the actual size of the
-   compressed data.  compress() is equivalent to compress2() with a level
-   parameter of Z_DEFAULT_COMPRESSION.
-
-     compress returns Z_OK if success, Z_MEM_ERROR if there was not
-   enough memory, Z_BUF_ERROR if there was not enough room in the output
-   buffer.
-*/
-
-ZEXTERN int ZEXPORT compress2 OF((Bytef *dest,   uLongf *destLen,
-                                  const Bytef *source, uLong sourceLen,
-                                  int level));
-/*
-     Compresses the source buffer into the destination buffer.  The level
-   parameter has the same meaning as in deflateInit.  sourceLen is the byte
-   length of the source buffer.  Upon entry, destLen is the total size of the
-   destination buffer, which must be at least the value returned by
-   compressBound(sourceLen).  Upon exit, destLen is the actual size of the
-   compressed data.
-
-     compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
-   memory, Z_BUF_ERROR if there was not enough room in the output buffer,
-   Z_STREAM_ERROR if the level parameter is invalid.
-*/
-
-ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen));
-/*
-     compressBound() returns an upper bound on the compressed size after
-   compress() or compress2() on sourceLen bytes.  It would be used before a
-   compress() or compress2() call to allocate the destination buffer.
-*/
-
-ZEXTERN int ZEXPORT uncompress OF((Bytef *dest,   uLongf *destLen,
-                                   const Bytef *source, uLong sourceLen));
-/*
-     Decompresses the source buffer into the destination buffer.  sourceLen is
-   the byte length of the source buffer.  Upon entry, destLen is the total size
-   of the destination buffer, which must be large enough to hold the entire
-   uncompressed data.  (The size of the uncompressed data must have been saved
-   previously by the compressor and transmitted to the decompressor by some
-   mechanism outside the scope of this compression library.) Upon exit, destLen
-   is the actual size of the uncompressed data.
-
-     uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
-   enough memory, Z_BUF_ERROR if there was not enough room in the output
-   buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete.  In
-   the case where there is not enough room, uncompress() will fill the output
-   buffer with the uncompressed data up to that point.
-*/
-
-ZEXTERN int ZEXPORT uncompress2 OF((Bytef *dest,   uLongf *destLen,
-                                    const Bytef *source, uLong *sourceLen));
-/*
-     Same as uncompress, except that sourceLen is a pointer, where the
-   length of the source is *sourceLen.  On return, *sourceLen is the number of
-   source bytes consumed.
-*/
-
-                        /* gzip file access functions */
-
-/*
-     This library supports reading and writing files in gzip (.gz) format with
-   an interface similar to that of stdio, using the functions that start with
-   "gz".  The gzip format is different from the zlib format.  gzip is a gzip
-   wrapper, documented in RFC 1952, wrapped around a deflate stream.
-*/
-
-typedef struct gzFile_s *gzFile;    /* semi-opaque gzip file descriptor */
-
-/*
-ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode));
-
-     Opens a gzip (.gz) file for reading or writing.  The mode parameter is as
-   in fopen ("rb" or "wb") but can also include a compression level ("wb9") or
-   a strategy: 'f' for filtered data as in "wb6f", 'h' for Huffman-only
-   compression as in "wb1h", 'R' for run-length encoding as in "wb1R", or 'F'
-   for fixed code compression as in "wb9F".  (See the description of
-   deflateInit2 for more information about the strategy parameter.)  'T' will
-   request transparent writing or appending with no compression and not using
-   the gzip format.
-
-     "a" can be used instead of "w" to request that the gzip stream that will
-   be written be appended to the file.  "+" will result in an error, since
-   reading and writing to the same gzip file is not supported.  The addition of
-   "x" when writing will create the file exclusively, which fails if the file
-   already exists.  On systems that support it, the addition of "e" when
-   reading or writing will set the flag to close the file on an execve() call.
-
-     These functions, as well as gzip, will read and decode a sequence of gzip
-   streams in a file.  The append function of gzopen() can be used to create
-   such a file.  (Also see gzflush() for another way to do this.)  When
-   appending, gzopen does not test whether the file begins with a gzip stream,
-   nor does it look for the end of the gzip streams to begin appending.  gzopen
-   will simply append a gzip stream to the existing file.
-
-     gzopen can be used to read a file which is not in gzip format; in this
-   case gzread will directly read from the file without decompression.  When
-   reading, this will be detected automatically by looking for the magic two-
-   byte gzip header.
-
-     gzopen returns NULL if the file could not be opened, if there was
-   insufficient memory to allocate the gzFile state, or if an invalid mode was
-   specified (an 'r', 'w', or 'a' was not provided, or '+' was provided).
-   errno can be checked to determine if the reason gzopen failed was that the
-   file could not be opened.
-*/
-
-ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode));
-/*
-     gzdopen associates a gzFile with the file descriptor fd.  File descriptors
-   are obtained from calls like open, dup, creat, pipe or fileno (if the file
-   has been previously opened with fopen).  The mode parameter is as in gzopen.
-
-     The next call of gzclose on the returned gzFile will also close the file
-   descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor
-   fd.  If you want to keep fd open, use fd = dup(fd_keep); gz = gzdopen(fd,
-   mode);.  The duplicated descriptor should be saved to avoid a leak, since
-   gzdopen does not close fd if it fails.  If you are using fileno() to get the
-   file descriptor from a FILE *, then you will have to use dup() to avoid
-   double-close()ing the file descriptor.  Both gzclose() and fclose() will
-   close the associated file descriptor, so they need to have different file
-   descriptors.
-
-     gzdopen returns NULL if there was insufficient memory to allocate the
-   gzFile state, if an invalid mode was specified (an 'r', 'w', or 'a' was not
-   provided, or '+' was provided), or if fd is -1.  The file descriptor is not
-   used until the next gz* read, write, seek, or close operation, so gzdopen
-   will not detect if fd is invalid (unless fd is -1).
-*/
-
-ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size));
-/*
-     Set the internal buffer size used by this library's functions.  The
-   default buffer size is 8192 bytes.  This function must be called after
-   gzopen() or gzdopen(), and before any other calls that read or write the
-   file.  The buffer memory allocation is always deferred to the first read or
-   write.  Three times that size in buffer space is allocated.  A larger buffer
-   size of, for example, 64K or 128K bytes will noticeably increase the speed
-   of decompression (reading).
-
-     The new buffer size also affects the maximum length for gzprintf().
-
-     gzbuffer() returns 0 on success, or -1 on failure, such as being called
-   too late.
-*/
-
-ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy));
-/*
-     Dynamically update the compression level or strategy.  See the description
-   of deflateInit2 for the meaning of these parameters.  Previously provided
-   data is flushed before the parameter change.
-
-     gzsetparams returns Z_OK if success, Z_STREAM_ERROR if the file was not
-   opened for writing, Z_ERRNO if there is an error writing the flushed data,
-   or Z_MEM_ERROR if there is a memory allocation error.
-*/
-
-ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len));
-/*
-     Reads the given number of uncompressed bytes from the compressed file.  If
-   the input file is not in gzip format, gzread copies the given number of
-   bytes into the buffer directly from the file.
-
-     After reaching the end of a gzip stream in the input, gzread will continue
-   to read, looking for another gzip stream.  Any number of gzip streams may be
-   concatenated in the input file, and will all be decompressed by gzread().
-   If something other than a gzip stream is encountered after a gzip stream,
-   that remaining trailing garbage is ignored (and no error is returned).
-
-     gzread can be used to read a gzip file that is being concurrently written.
-   Upon reaching the end of the input, gzread will return with the available
-   data.  If the error code returned by gzerror is Z_OK or Z_BUF_ERROR, then
-   gzclearerr can be used to clear the end of file indicator in order to permit
-   gzread to be tried again.  Z_OK indicates that a gzip stream was completed
-   on the last gzread.  Z_BUF_ERROR indicates that the input file ended in the
-   middle of a gzip stream.  Note that gzread does not return -1 in the event
-   of an incomplete gzip stream.  This error is deferred until gzclose(), which
-   will return Z_BUF_ERROR if the last gzread ended in the middle of a gzip
-   stream.  Alternatively, gzerror can be used before gzclose to detect this
-   case.
-
-     gzread returns the number of uncompressed bytes actually read, less than
-   len for end of file, or -1 for error.  If len is too large to fit in an int,
-   then nothing is read, -1 is returned, and the error state is set to
-   Z_STREAM_ERROR.
-*/
-
-ZEXTERN z_size_t ZEXPORT gzfread OF((voidp buf, z_size_t size, z_size_t nitems,
-                                     gzFile file));
-/*
-     Read up to nitems items of size size from file to buf, otherwise operating
-   as gzread() does.  This duplicates the interface of stdio's fread(), with
-   size_t request and return types.  If the library defines size_t, then
-   z_size_t is identical to size_t.  If not, then z_size_t is an unsigned
-   integer type that can contain a pointer.
-
-     gzfread() returns the number of full items read of size size, or zero if
-   the end of the file was reached and a full item could not be read, or if
-   there was an error.  gzerror() must be consulted if zero is returned in
-   order to determine if there was an error.  If the multiplication of size and
-   nitems overflows, i.e. the product does not fit in a z_size_t, then nothing
-   is read, zero is returned, and the error state is set to Z_STREAM_ERROR.
-
-     In the event that the end of file is reached and only a partial item is
-   available at the end, i.e. the remaining uncompressed data length is not a
-   multiple of size, then the final partial item is nevetheless read into buf
-   and the end-of-file flag is set.  The length of the partial item read is not
-   provided, but could be inferred from the result of gztell().  This behavior
-   is the same as the behavior of fread() implementations in common libraries,
-   but it prevents the direct use of gzfread() to read a concurrently written
-   file, reseting and retrying on end-of-file, when size is not 1.
-*/
-
-ZEXTERN int ZEXPORT gzwrite OF((gzFile file,
-                                voidpc buf, unsigned len));
-/*
-     Writes the given number of uncompressed bytes into the compressed file.
-   gzwrite returns the number of uncompressed bytes written or 0 in case of
-   error.
-*/
-
-ZEXTERN z_size_t ZEXPORT gzfwrite OF((voidpc buf, z_size_t size,
-                                      z_size_t nitems, gzFile file));
-/*
-     gzfwrite() writes nitems items of size size from buf to file, duplicating
-   the interface of stdio's fwrite(), with size_t request and return types.  If
-   the library defines size_t, then z_size_t is identical to size_t.  If not,
-   then z_size_t is an unsigned integer type that can contain a pointer.
-
-     gzfwrite() returns the number of full items written of size size, or zero
-   if there was an error.  If the multiplication of size and nitems overflows,
-   i.e. the product does not fit in a z_size_t, then nothing is written, zero
-   is returned, and the error state is set to Z_STREAM_ERROR.
-*/
-
-ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...));
-/*
-     Converts, formats, and writes the arguments to the compressed file under
-   control of the format string, as in fprintf.  gzprintf returns the number of
-   uncompressed bytes actually written, or a negative zlib error code in case
-   of error.  The number of uncompressed bytes written is limited to 8191, or
-   one less than the buffer size given to gzbuffer().  The caller should assure
-   that this limit is not exceeded.  If it is exceeded, then gzprintf() will
-   return an error (0) with nothing written.  In this case, there may also be a
-   buffer overflow with unpredictable consequences, which is possible only if
-   zlib was compiled with the insecure functions sprintf() or vsprintf()
-   because the secure snprintf() or vsnprintf() functions were not available.
-   This can be determined using zlibCompileFlags().
-*/
-
-ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s));
-/*
-     Writes the given null-terminated string to the compressed file, excluding
-   the terminating null character.
-
-     gzputs returns the number of characters written, or -1 in case of error.
-*/
-
-ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len));
-/*
-     Reads bytes from the compressed file until len-1 characters are read, or a
-   newline character is read and transferred to buf, or an end-of-file
-   condition is encountered.  If any characters are read or if len == 1, the
-   string is terminated with a null character.  If no characters are read due
-   to an end-of-file or len < 1, then the buffer is left untouched.
-
-     gzgets returns buf which is a null-terminated string, or it returns NULL
-   for end-of-file or in case of error.  If there was an error, the contents at
-   buf are indeterminate.
-*/
-
-ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c));
-/*
-     Writes c, converted to an unsigned char, into the compressed file.  gzputc
-   returns the value that was written, or -1 in case of error.
-*/
-
-ZEXTERN int ZEXPORT gzgetc OF((gzFile file));
-/*
-     Reads one byte from the compressed file.  gzgetc returns this byte or -1
-   in case of end of file or error.  This is implemented as a macro for speed.
-   As such, it does not do all of the checking the other functions do.  I.e.
-   it does not check to see if file is NULL, nor whether the structure file
-   points to has been clobbered or not.
-*/
-
-ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file));
-/*
-     Push one character back onto the stream to be read as the first character
-   on the next read.  At least one character of push-back is allowed.
-   gzungetc() returns the character pushed, or -1 on failure.  gzungetc() will
-   fail if c is -1, and may fail if a character has been pushed but not read
-   yet.  If gzungetc is used immediately after gzopen or gzdopen, at least the
-   output buffer size of pushed characters is allowed.  (See gzbuffer above.)
-   The pushed character will be discarded if the stream is repositioned with
-   gzseek() or gzrewind().
-*/
-
-ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush));
-/*
-     Flushes all pending output into the compressed file.  The parameter flush
-   is as in the deflate() function.  The return value is the zlib error number
-   (see function gzerror below).  gzflush is only permitted when writing.
-
-     If the flush parameter is Z_FINISH, the remaining data is written and the
-   gzip stream is completed in the output.  If gzwrite() is called again, a new
-   gzip stream will be started in the output.  gzread() is able to read such
-   concatenated gzip streams.
-
-     gzflush should be called only when strictly necessary because it will
-   degrade compression if called too often.
-*/
-
-/*
-ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file,
-                                   z_off_t offset, int whence));
-
-     Sets the starting position for the next gzread or gzwrite on the given
-   compressed file.  The offset represents a number of bytes in the
-   uncompressed data stream.  The whence parameter is defined as in lseek(2);
-   the value SEEK_END is not supported.
-
-     If the file is opened for reading, this function is emulated but can be
-   extremely slow.  If the file is opened for writing, only forward seeks are
-   supported; gzseek then compresses a sequence of zeroes up to the new
-   starting position.
-
-     gzseek returns the resulting offset location as measured in bytes from
-   the beginning of the uncompressed stream, or -1 in case of error, in
-   particular if the file is opened for writing and the new starting position
-   would be before the current position.
-*/
-
-ZEXTERN int ZEXPORT    gzrewind OF((gzFile file));
-/*
-     Rewinds the given file. This function is supported only for reading.
-
-     gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET)
-*/
-
-/*
-ZEXTERN z_off_t ZEXPORT    gztell OF((gzFile file));
-
-     Returns the starting position for the next gzread or gzwrite on the given
-   compressed file.  This position represents a number of bytes in the
-   uncompressed data stream, and is zero when starting, even if appending or
-   reading a gzip stream from the middle of a file using gzdopen().
-
-     gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR)
-*/
-
-/*
-ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile file));
-
-     Returns the current offset in the file being read or written.  This offset
-   includes the count of bytes that precede the gzip stream, for example when
-   appending or when using gzdopen() for reading.  When reading, the offset
-   does not include as yet unused buffered input.  This information can be used
-   for a progress indicator.  On error, gzoffset() returns -1.
-*/
-
-ZEXTERN int ZEXPORT gzeof OF((gzFile file));
-/*
-     Returns true (1) if the end-of-file indicator has been set while reading,
-   false (0) otherwise.  Note that the end-of-file indicator is set only if the
-   read tried to go past the end of the input, but came up short.  Therefore,
-   just like feof(), gzeof() may return false even if there is no more data to
-   read, in the event that the last read request was for the exact number of
-   bytes remaining in the input file.  This will happen if the input file size
-   is an exact multiple of the buffer size.
-
-     If gzeof() returns true, then the read functions will return no more data,
-   unless the end-of-file indicator is reset by gzclearerr() and the input file
-   has grown since the previous end of file was detected.
-*/
-
-ZEXTERN int ZEXPORT gzdirect OF((gzFile file));
-/*
-     Returns true (1) if file is being copied directly while reading, or false
-   (0) if file is a gzip stream being decompressed.
-
-     If the input file is empty, gzdirect() will return true, since the input
-   does not contain a gzip stream.
-
-     If gzdirect() is used immediately after gzopen() or gzdopen() it will
-   cause buffers to be allocated to allow reading the file to determine if it
-   is a gzip file.  Therefore if gzbuffer() is used, it should be called before
-   gzdirect().
-
-     When writing, gzdirect() returns true (1) if transparent writing was
-   requested ("wT" for the gzopen() mode), or false (0) otherwise.  (Note:
-   gzdirect() is not needed when writing.  Transparent writing must be
-   explicitly requested, so the application already knows the answer.  When
-   linking statically, using gzdirect() will include all of the zlib code for
-   gzip file reading and decompression, which may not be desired.)
-*/
-
-ZEXTERN int ZEXPORT    gzclose OF((gzFile file));
-/*
-     Flushes all pending output if necessary, closes the compressed file and
-   deallocates the (de)compression state.  Note that once file is closed, you
-   cannot call gzerror with file, since its structures have been deallocated.
-   gzclose must not be called more than once on the same file, just as free
-   must not be called more than once on the same allocation.
-
-     gzclose will return Z_STREAM_ERROR if file is not valid, Z_ERRNO on a
-   file operation error, Z_MEM_ERROR if out of memory, Z_BUF_ERROR if the
-   last read ended in the middle of a gzip stream, or Z_OK on success.
-*/
-
-ZEXTERN int ZEXPORT gzclose_r OF((gzFile file));
-ZEXTERN int ZEXPORT gzclose_w OF((gzFile file));
-/*
-     Same as gzclose(), but gzclose_r() is only for use when reading, and
-   gzclose_w() is only for use when writing or appending.  The advantage to
-   using these instead of gzclose() is that they avoid linking in zlib
-   compression or decompression code that is not used when only reading or only
-   writing respectively.  If gzclose() is used, then both compression and
-   decompression code will be included the application when linking to a static
-   zlib library.
-*/
-
-ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum));
-/*
-     Returns the error message for the last error which occurred on the given
-   compressed file.  errnum is set to zlib error number.  If an error occurred
-   in the file system and not in the compression library, errnum is set to
-   Z_ERRNO and the application may consult errno to get the exact error code.
-
-     The application must not modify the returned string.  Future calls to
-   this function may invalidate the previously returned string.  If file is
-   closed, then the string previously returned by gzerror will no longer be
-   available.
-
-     gzerror() should be used to distinguish errors from end-of-file for those
-   functions above that do not distinguish those cases in their return values.
-*/
-
-ZEXTERN void ZEXPORT gzclearerr OF((gzFile file));
-/*
-     Clears the error and end-of-file flags for file.  This is analogous to the
-   clearerr() function in stdio.  This is useful for continuing to read a gzip
-   file that is being written concurrently.
-*/
-
-#endif /* !Z_SOLO */
-
-                        /* checksum functions */
-
-/*
-     These functions are not related to compression but are exported
-   anyway because they might be useful in applications using the compression
-   library.
-*/
-
-ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len));
-/*
-     Update a running Adler-32 checksum with the bytes buf[0..len-1] and
-   return the updated checksum.  If buf is Z_NULL, this function returns the
-   required initial value for the checksum.
-
-     An Adler-32 checksum is almost as reliable as a CRC-32 but can be computed
-   much faster.
-
-   Usage example:
-
-     uLong adler = adler32(0L, Z_NULL, 0);
-
-     while (read_buffer(buffer, length) != EOF) {
-       adler = adler32(adler, buffer, length);
-     }
-     if (adler != original_adler) error();
-*/
-
-ZEXTERN uLong ZEXPORT adler32_z OF((uLong adler, const Bytef *buf,
-                                    z_size_t len));
-/*
-     Same as adler32(), but with a size_t length.
-*/
-
-/*
-ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2,
-                                          z_off_t len2));
-
-     Combine two Adler-32 checksums into one.  For two sequences of bytes, seq1
-   and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for
-   each, adler1 and adler2.  adler32_combine() returns the Adler-32 checksum of
-   seq1 and seq2 concatenated, requiring only adler1, adler2, and len2.  Note
-   that the z_off_t type (like off_t) is a signed integer.  If len2 is
-   negative, the result has no meaning or utility.
-*/
-
-ZEXTERN uLong ZEXPORT crc32   OF((uLong crc, const Bytef *buf, uInt len));
-/*
-     Update a running CRC-32 with the bytes buf[0..len-1] and return the
-   updated CRC-32.  If buf is Z_NULL, this function returns the required
-   initial value for the crc.  Pre- and post-conditioning (one's complement) is
-   performed within this function so it shouldn't be done by the application.
-
-   Usage example:
-
-     uLong crc = crc32(0L, Z_NULL, 0);
-
-     while (read_buffer(buffer, length) != EOF) {
-       crc = crc32(crc, buffer, length);
-     }
-     if (crc != original_crc) error();
-*/
-
-ZEXTERN uLong ZEXPORT crc32_z OF((uLong adler, const Bytef *buf,
-                                  z_size_t len));
-/*
-     Same as crc32(), but with a size_t length.
-*/
-
-/*
-ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2));
-
-     Combine two CRC-32 check values into one.  For two sequences of bytes,
-   seq1 and seq2 with lengths len1 and len2, CRC-32 check values were
-   calculated for each, crc1 and crc2.  crc32_combine() returns the CRC-32
-   check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and
-   len2.
-*/
-
-
-                        /* various hacks, don't look :) */
-
-/* deflateInit and inflateInit are macros to allow checking the zlib version
- * and the compiler's view of z_stream:
- */
-ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level,
-                                     const char *version, int stream_size));
-ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm,
-                                     const char *version, int stream_size));
-ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int  level, int  method,
-                                      int windowBits, int memLevel,
-                                      int strategy, const char *version,
-                                      int stream_size));
-ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int  windowBits,
-                                      const char *version, int stream_size));
-ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits,
-                                         unsigned char FAR *window,
-                                         const char *version,
-                                         int stream_size));
-#ifdef Z_PREFIX_SET
-#  define z_deflateInit(strm, level) \
-          deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream))
-#  define z_inflateInit(strm) \
-          inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream))
-#  define z_deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
-          deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
-                        (strategy), ZLIB_VERSION, (int)sizeof(z_stream))
-#  define z_inflateInit2(strm, windowBits) \
-          inflateInit2_((strm), (windowBits), ZLIB_VERSION, \
-                        (int)sizeof(z_stream))
-#  define z_inflateBackInit(strm, windowBits, window) \
-          inflateBackInit_((strm), (windowBits), (window), \
-                           ZLIB_VERSION, (int)sizeof(z_stream))
-#else
-#  define deflateInit(strm, level) \
-          deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream))
-#  define inflateInit(strm) \
-          inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream))
-#  define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
-          deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
-                        (strategy), ZLIB_VERSION, (int)sizeof(z_stream))
-#  define inflateInit2(strm, windowBits) \
-          inflateInit2_((strm), (windowBits), ZLIB_VERSION, \
-                        (int)sizeof(z_stream))
-#  define inflateBackInit(strm, windowBits, window) \
-          inflateBackInit_((strm), (windowBits), (window), \
-                           ZLIB_VERSION, (int)sizeof(z_stream))
-#endif
-
-#ifndef Z_SOLO
-
-/* gzgetc() macro and its supporting function and exposed data structure.  Note
- * that the real internal state is much larger than the exposed structure.
- * This abbreviated structure exposes just enough for the gzgetc() macro.  The
- * user should not mess with these exposed elements, since their names or
- * behavior could change in the future, perhaps even capriciously.  They can
- * only be used by the gzgetc() macro.  You have been warned.
- */
-struct gzFile_s {
-    unsigned have;
-    unsigned char *next;
-    z_off64_t pos;
-};
-ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file));  /* backward compatibility */
-#ifdef Z_PREFIX_SET
-#  undef z_gzgetc
-#  define z_gzgetc(g) \
-          ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g))
-#else
-#  define gzgetc(g) \
-          ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g))
-#endif
-
-/* provide 64-bit offset functions if _LARGEFILE64_SOURCE defined, and/or
- * change the regular functions to 64 bits if _FILE_OFFSET_BITS is 64 (if
- * both are true, the application gets the *64 functions, and the regular
- * functions are changed to 64 bits) -- in case these are set on systems
- * without large file support, _LFS64_LARGEFILE must also be true
- */
-#ifdef Z_LARGE64
-   ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
-   ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int));
-   ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile));
-   ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile));
-   ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off64_t));
-   ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off64_t));
-#endif
-
-#if !defined(ZLIB_INTERNAL) && defined(Z_WANT64)
-#  ifdef Z_PREFIX_SET
-#    define z_gzopen z_gzopen64
-#    define z_gzseek z_gzseek64
-#    define z_gztell z_gztell64
-#    define z_gzoffset z_gzoffset64
-#    define z_adler32_combine z_adler32_combine64
-#    define z_crc32_combine z_crc32_combine64
-#  else
-#    define gzopen gzopen64
-#    define gzseek gzseek64
-#    define gztell gztell64
-#    define gzoffset gzoffset64
-#    define adler32_combine adler32_combine64
-#    define crc32_combine crc32_combine64
-#  endif
-#  ifndef Z_LARGE64
-     ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
-     ZEXTERN z_off_t ZEXPORT gzseek64 OF((gzFile, z_off_t, int));
-     ZEXTERN z_off_t ZEXPORT gztell64 OF((gzFile));
-     ZEXTERN z_off_t ZEXPORT gzoffset64 OF((gzFile));
-     ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t));
-     ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t));
-#  endif
-#else
-   ZEXTERN gzFile ZEXPORT gzopen OF((const char *, const char *));
-   ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile, z_off_t, int));
-   ZEXTERN z_off_t ZEXPORT gztell OF((gzFile));
-   ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile));
-   ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t));
-   ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t));
-#endif
-
-#else /* Z_SOLO */
-
-   ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t));
-   ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t));
-
-#endif /* !Z_SOLO */
-
-/* undocumented functions */
-ZEXTERN const char   * ZEXPORT zError           OF((int));
-ZEXTERN int            ZEXPORT inflateSyncPoint OF((z_streamp));
-ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table    OF((void));
-ZEXTERN int            ZEXPORT inflateUndermine OF((z_streamp, int));
-ZEXTERN int            ZEXPORT inflateValidate OF((z_streamp, int));
-ZEXTERN unsigned long  ZEXPORT inflateCodesUsed OF ((z_streamp));
-ZEXTERN int            ZEXPORT inflateResetKeep OF((z_streamp));
-ZEXTERN int            ZEXPORT deflateResetKeep OF((z_streamp));
-#if (defined(_WIN32) || defined(__CYGWIN__)) && !defined(Z_SOLO)
-ZEXTERN gzFile         ZEXPORT gzopen_w OF((const wchar_t *path,
-                                            const char *mode));
-#endif
-#if defined(STDC) || defined(Z_HAVE_STDARG_H)
-#  ifndef Z_SOLO
-ZEXTERN int            ZEXPORTVA gzvprintf Z_ARG((gzFile file,
-                                                  const char *format,
-                                                  va_list va));
-#  endif
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* ZLIB_H */
diff --git a/dep/zlib/src/adler32.c b/dep/zlib/src/adler32.c
deleted file mode 100644
index d0be4380a..000000000
--- a/dep/zlib/src/adler32.c
+++ /dev/null
@@ -1,186 +0,0 @@
-/* adler32.c -- compute the Adler-32 checksum of a data stream
- * Copyright (C) 1995-2011, 2016 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/* @(#) $Id$ */
-
-#include "zutil.h"
-
-local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2));
-
-#define BASE 65521U     /* largest prime smaller than 65536 */
-#define NMAX 5552
-/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
-
-#define DO1(buf,i)  {adler += (buf)[i]; sum2 += adler;}
-#define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
-#define DO4(buf,i)  DO2(buf,i); DO2(buf,i+2);
-#define DO8(buf,i)  DO4(buf,i); DO4(buf,i+4);
-#define DO16(buf)   DO8(buf,0); DO8(buf,8);
-
-/* use NO_DIVIDE if your processor does not do division in hardware --
-   try it both ways to see which is faster */
-#ifdef NO_DIVIDE
-/* note that this assumes BASE is 65521, where 65536 % 65521 == 15
-   (thank you to John Reiser for pointing this out) */
-#  define CHOP(a) \
-    do { \
-        unsigned long tmp = a >> 16; \
-        a &= 0xffffUL; \
-        a += (tmp << 4) - tmp; \
-    } while (0)
-#  define MOD28(a) \
-    do { \
-        CHOP(a); \
-        if (a >= BASE) a -= BASE; \
-    } while (0)
-#  define MOD(a) \
-    do { \
-        CHOP(a); \
-        MOD28(a); \
-    } while (0)
-#  define MOD63(a) \
-    do { /* this assumes a is not negative */ \
-        z_off64_t tmp = a >> 32; \
-        a &= 0xffffffffL; \
-        a += (tmp << 8) - (tmp << 5) + tmp; \
-        tmp = a >> 16; \
-        a &= 0xffffL; \
-        a += (tmp << 4) - tmp; \
-        tmp = a >> 16; \
-        a &= 0xffffL; \
-        a += (tmp << 4) - tmp; \
-        if (a >= BASE) a -= BASE; \
-    } while (0)
-#else
-#  define MOD(a) a %= BASE
-#  define MOD28(a) a %= BASE
-#  define MOD63(a) a %= BASE
-#endif
-
-/* ========================================================================= */
-uLong ZEXPORT adler32_z(adler, buf, len)
-    uLong adler;
-    const Bytef *buf;
-    z_size_t len;
-{
-    unsigned long sum2;
-    unsigned n;
-
-    /* split Adler-32 into component sums */
-    sum2 = (adler >> 16) & 0xffff;
-    adler &= 0xffff;
-
-    /* in case user likes doing a byte at a time, keep it fast */
-    if (len == 1) {
-        adler += buf[0];
-        if (adler >= BASE)
-            adler -= BASE;
-        sum2 += adler;
-        if (sum2 >= BASE)
-            sum2 -= BASE;
-        return adler | (sum2 << 16);
-    }
-
-    /* initial Adler-32 value (deferred check for len == 1 speed) */
-    if (buf == Z_NULL)
-        return 1L;
-
-    /* in case short lengths are provided, keep it somewhat fast */
-    if (len < 16) {
-        while (len--) {
-            adler += *buf++;
-            sum2 += adler;
-        }
-        if (adler >= BASE)
-            adler -= BASE;
-        MOD28(sum2);            /* only added so many BASE's */
-        return adler | (sum2 << 16);
-    }
-
-    /* do length NMAX blocks -- requires just one modulo operation */
-    while (len >= NMAX) {
-        len -= NMAX;
-        n = NMAX / 16;          /* NMAX is divisible by 16 */
-        do {
-            DO16(buf);          /* 16 sums unrolled */
-            buf += 16;
-        } while (--n);
-        MOD(adler);
-        MOD(sum2);
-    }
-
-    /* do remaining bytes (less than NMAX, still just one modulo) */
-    if (len) {                  /* avoid modulos if none remaining */
-        while (len >= 16) {
-            len -= 16;
-            DO16(buf);
-            buf += 16;
-        }
-        while (len--) {
-            adler += *buf++;
-            sum2 += adler;
-        }
-        MOD(adler);
-        MOD(sum2);
-    }
-
-    /* return recombined sums */
-    return adler | (sum2 << 16);
-}
-
-/* ========================================================================= */
-uLong ZEXPORT adler32(adler, buf, len)
-    uLong adler;
-    const Bytef *buf;
-    uInt len;
-{
-    return adler32_z(adler, buf, len);
-}
-
-/* ========================================================================= */
-local uLong adler32_combine_(adler1, adler2, len2)
-    uLong adler1;
-    uLong adler2;
-    z_off64_t len2;
-{
-    unsigned long sum1;
-    unsigned long sum2;
-    unsigned rem;
-
-    /* for negative len, return invalid adler32 as a clue for debugging */
-    if (len2 < 0)
-        return 0xffffffffUL;
-
-    /* the derivation of this formula is left as an exercise for the reader */
-    MOD63(len2);                /* assumes len2 >= 0 */
-    rem = (unsigned)len2;
-    sum1 = adler1 & 0xffff;
-    sum2 = rem * sum1;
-    MOD(sum2);
-    sum1 += (adler2 & 0xffff) + BASE - 1;
-    sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
-    if (sum1 >= BASE) sum1 -= BASE;
-    if (sum1 >= BASE) sum1 -= BASE;
-    if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1);
-    if (sum2 >= BASE) sum2 -= BASE;
-    return sum1 | (sum2 << 16);
-}
-
-/* ========================================================================= */
-uLong ZEXPORT adler32_combine(adler1, adler2, len2)
-    uLong adler1;
-    uLong adler2;
-    z_off_t len2;
-{
-    return adler32_combine_(adler1, adler2, len2);
-}
-
-uLong ZEXPORT adler32_combine64(adler1, adler2, len2)
-    uLong adler1;
-    uLong adler2;
-    z_off64_t len2;
-{
-    return adler32_combine_(adler1, adler2, len2);
-}
diff --git a/dep/zlib/src/compress.c b/dep/zlib/src/compress.c
deleted file mode 100644
index e2db404ab..000000000
--- a/dep/zlib/src/compress.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/* compress.c -- compress a memory buffer
- * Copyright (C) 1995-2005, 2014, 2016 Jean-loup Gailly, Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/* @(#) $Id$ */
-
-#define ZLIB_INTERNAL
-#include "zlib.h"
-
-/* ===========================================================================
-     Compresses the source buffer into the destination buffer. The level
-   parameter has the same meaning as in deflateInit.  sourceLen is the byte
-   length of the source buffer. Upon entry, destLen is the total size of the
-   destination buffer, which must be at least 0.1% larger than sourceLen plus
-   12 bytes. Upon exit, destLen is the actual size of the compressed buffer.
-
-     compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
-   memory, Z_BUF_ERROR if there was not enough room in the output buffer,
-   Z_STREAM_ERROR if the level parameter is invalid.
-*/
-int ZEXPORT compress2 (dest, destLen, source, sourceLen, level)
-    Bytef *dest;
-    uLongf *destLen;
-    const Bytef *source;
-    uLong sourceLen;
-    int level;
-{
-    z_stream stream;
-    int err;
-    const uInt max = (uInt)-1;
-    uLong left;
-
-    left = *destLen;
-    *destLen = 0;
-
-    stream.zalloc = (alloc_func)0;
-    stream.zfree = (free_func)0;
-    stream.opaque = (voidpf)0;
-
-    err = deflateInit(&stream, level);
-    if (err != Z_OK) return err;
-
-    stream.next_out = dest;
-    stream.avail_out = 0;
-    stream.next_in = (z_const Bytef *)source;
-    stream.avail_in = 0;
-
-    do {
-        if (stream.avail_out == 0) {
-            stream.avail_out = left > (uLong)max ? max : (uInt)left;
-            left -= stream.avail_out;
-        }
-        if (stream.avail_in == 0) {
-            stream.avail_in = sourceLen > (uLong)max ? max : (uInt)sourceLen;
-            sourceLen -= stream.avail_in;
-        }
-        err = deflate(&stream, sourceLen ? Z_NO_FLUSH : Z_FINISH);
-    } while (err == Z_OK);
-
-    *destLen = stream.total_out;
-    deflateEnd(&stream);
-    return err == Z_STREAM_END ? Z_OK : err;
-}
-
-/* ===========================================================================
- */
-int ZEXPORT compress (dest, destLen, source, sourceLen)
-    Bytef *dest;
-    uLongf *destLen;
-    const Bytef *source;
-    uLong sourceLen;
-{
-    return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION);
-}
-
-/* ===========================================================================
-     If the default memLevel or windowBits for deflateInit() is changed, then
-   this function needs to be updated.
- */
-uLong ZEXPORT compressBound (sourceLen)
-    uLong sourceLen;
-{
-    return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) +
-           (sourceLen >> 25) + 13;
-}
diff --git a/dep/zlib/src/crc32.c b/dep/zlib/src/crc32.c
deleted file mode 100644
index 9580440c0..000000000
--- a/dep/zlib/src/crc32.c
+++ /dev/null
@@ -1,442 +0,0 @@
-/* crc32.c -- compute the CRC-32 of a data stream
- * Copyright (C) 1995-2006, 2010, 2011, 2012, 2016 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- *
- * Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster
- * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing
- * tables for updating the shift register in one step with three exclusive-ors
- * instead of four steps with four exclusive-ors.  This results in about a
- * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
- */
-
-/* @(#) $Id$ */
-
-/*
-  Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
-  protection on the static variables used to control the first-use generation
-  of the crc tables.  Therefore, if you #define DYNAMIC_CRC_TABLE, you should
-  first call get_crc_table() to initialize the tables before allowing more than
-  one thread to use crc32().
-
-  DYNAMIC_CRC_TABLE and MAKECRCH can be #defined to write out crc32.h.
- */
-
-#ifdef MAKECRCH
-#  include <stdio.h>
-#  ifndef DYNAMIC_CRC_TABLE
-#    define DYNAMIC_CRC_TABLE
-#  endif /* !DYNAMIC_CRC_TABLE */
-#endif /* MAKECRCH */
-
-#include "zutil.h"      /* for STDC and FAR definitions */
-
-/* Definitions for doing the crc four data bytes at a time. */
-#if !defined(NOBYFOUR) && defined(Z_U4)
-#  define BYFOUR
-#endif
-#ifdef BYFOUR
-   local unsigned long crc32_little OF((unsigned long,
-                        const unsigned char FAR *, z_size_t));
-   local unsigned long crc32_big OF((unsigned long,
-                        const unsigned char FAR *, z_size_t));
-#  define TBLS 8
-#else
-#  define TBLS 1
-#endif /* BYFOUR */
-
-/* Local functions for crc concatenation */
-local unsigned long gf2_matrix_times OF((unsigned long *mat,
-                                         unsigned long vec));
-local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat));
-local uLong crc32_combine_ OF((uLong crc1, uLong crc2, z_off64_t len2));
-
-
-#ifdef DYNAMIC_CRC_TABLE
-
-local volatile int crc_table_empty = 1;
-local z_crc_t FAR crc_table[TBLS][256];
-local void make_crc_table OF((void));
-#ifdef MAKECRCH
-   local void write_table OF((FILE *, const z_crc_t FAR *));
-#endif /* MAKECRCH */
-/*
-  Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
-  x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
-
-  Polynomials over GF(2) are represented in binary, one bit per coefficient,
-  with the lowest powers in the most significant bit.  Then adding polynomials
-  is just exclusive-or, and multiplying a polynomial by x is a right shift by
-  one.  If we call the above polynomial p, and represent a byte as the
-  polynomial q, also with the lowest power in the most significant bit (so the
-  byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p,
-  where a mod b means the remainder after dividing a by b.
-
-  This calculation is done using the shift-register method of multiplying and
-  taking the remainder.  The register is initialized to zero, and for each
-  incoming bit, x^32 is added mod p to the register if the bit is a one (where
-  x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by
-  x (which is shifting right by one and adding x^32 mod p if the bit shifted
-  out is a one).  We start with the highest power (least significant bit) of
-  q and repeat for all eight bits of q.
-
-  The first table is simply the CRC of all possible eight bit values.  This is
-  all the information needed to generate CRCs on data a byte at a time for all
-  combinations of CRC register values and incoming bytes.  The remaining tables
-  allow for word-at-a-time CRC calculation for both big-endian and little-
-  endian machines, where a word is four bytes.
-*/
-local void make_crc_table()
-{
-    z_crc_t c;
-    int n, k;
-    z_crc_t poly;                       /* polynomial exclusive-or pattern */
-    /* terms of polynomial defining this crc (except x^32): */
-    static volatile int first = 1;      /* flag to limit concurrent making */
-    static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26};
-
-    /* See if another task is already doing this (not thread-safe, but better
-       than nothing -- significantly reduces duration of vulnerability in
-       case the advice about DYNAMIC_CRC_TABLE is ignored) */
-    if (first) {
-        first = 0;
-
-        /* make exclusive-or pattern from polynomial (0xedb88320UL) */
-        poly = 0;
-        for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++)
-            poly |= (z_crc_t)1 << (31 - p[n]);
-
-        /* generate a crc for every 8-bit value */
-        for (n = 0; n < 256; n++) {
-            c = (z_crc_t)n;
-            for (k = 0; k < 8; k++)
-                c = c & 1 ? poly ^ (c >> 1) : c >> 1;
-            crc_table[0][n] = c;
-        }
-
-#ifdef BYFOUR
-        /* generate crc for each value followed by one, two, and three zeros,
-           and then the byte reversal of those as well as the first table */
-        for (n = 0; n < 256; n++) {
-            c = crc_table[0][n];
-            crc_table[4][n] = ZSWAP32(c);
-            for (k = 1; k < 4; k++) {
-                c = crc_table[0][c & 0xff] ^ (c >> 8);
-                crc_table[k][n] = c;
-                crc_table[k + 4][n] = ZSWAP32(c);
-            }
-        }
-#endif /* BYFOUR */
-
-        crc_table_empty = 0;
-    }
-    else {      /* not first */
-        /* wait for the other guy to finish (not efficient, but rare) */
-        while (crc_table_empty)
-            ;
-    }
-
-#ifdef MAKECRCH
-    /* write out CRC tables to crc32.h */
-    {
-        FILE *out;
-
-        out = fopen("crc32.h", "w");
-        if (out == NULL) return;
-        fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n");
-        fprintf(out, " * Generated automatically by crc32.c\n */\n\n");
-        fprintf(out, "local const z_crc_t FAR ");
-        fprintf(out, "crc_table[TBLS][256] =\n{\n  {\n");
-        write_table(out, crc_table[0]);
-#  ifdef BYFOUR
-        fprintf(out, "#ifdef BYFOUR\n");
-        for (k = 1; k < 8; k++) {
-            fprintf(out, "  },\n  {\n");
-            write_table(out, crc_table[k]);
-        }
-        fprintf(out, "#endif\n");
-#  endif /* BYFOUR */
-        fprintf(out, "  }\n};\n");
-        fclose(out);
-    }
-#endif /* MAKECRCH */
-}
-
-#ifdef MAKECRCH
-local void write_table(out, table)
-    FILE *out;
-    const z_crc_t FAR *table;
-{
-    int n;
-
-    for (n = 0; n < 256; n++)
-        fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : "    ",
-                (unsigned long)(table[n]),
-                n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", "));
-}
-#endif /* MAKECRCH */
-
-#else /* !DYNAMIC_CRC_TABLE */
-/* ========================================================================
- * Tables of CRC-32s of all single-byte values, made by make_crc_table().
- */
-#include "crc32.h"
-#endif /* DYNAMIC_CRC_TABLE */
-
-/* =========================================================================
- * This function can be used by asm versions of crc32()
- */
-const z_crc_t FAR * ZEXPORT get_crc_table()
-{
-#ifdef DYNAMIC_CRC_TABLE
-    if (crc_table_empty)
-        make_crc_table();
-#endif /* DYNAMIC_CRC_TABLE */
-    return (const z_crc_t FAR *)crc_table;
-}
-
-/* ========================================================================= */
-#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8)
-#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
-
-/* ========================================================================= */
-unsigned long ZEXPORT crc32_z(crc, buf, len)
-    unsigned long crc;
-    const unsigned char FAR *buf;
-    z_size_t len;
-{
-    if (buf == Z_NULL) return 0UL;
-
-#ifdef DYNAMIC_CRC_TABLE
-    if (crc_table_empty)
-        make_crc_table();
-#endif /* DYNAMIC_CRC_TABLE */
-
-#ifdef BYFOUR
-    if (sizeof(void *) == sizeof(ptrdiff_t)) {
-        z_crc_t endian;
-
-        endian = 1;
-        if (*((unsigned char *)(&endian)))
-            return crc32_little(crc, buf, len);
-        else
-            return crc32_big(crc, buf, len);
-    }
-#endif /* BYFOUR */
-    crc = crc ^ 0xffffffffUL;
-    while (len >= 8) {
-        DO8;
-        len -= 8;
-    }
-    if (len) do {
-        DO1;
-    } while (--len);
-    return crc ^ 0xffffffffUL;
-}
-
-/* ========================================================================= */
-unsigned long ZEXPORT crc32(crc, buf, len)
-    unsigned long crc;
-    const unsigned char FAR *buf;
-    uInt len;
-{
-    return crc32_z(crc, buf, len);
-}
-
-#ifdef BYFOUR
-
-/*
-   This BYFOUR code accesses the passed unsigned char * buffer with a 32-bit
-   integer pointer type. This violates the strict aliasing rule, where a
-   compiler can assume, for optimization purposes, that two pointers to
-   fundamentally different types won't ever point to the same memory. This can
-   manifest as a problem only if one of the pointers is written to. This code
-   only reads from those pointers. So long as this code remains isolated in
-   this compilation unit, there won't be a problem. For this reason, this code
-   should not be copied and pasted into a compilation unit in which other code
-   writes to the buffer that is passed to these routines.
- */
-
-/* ========================================================================= */
-#define DOLIT4 c ^= *buf4++; \
-        c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \
-            crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24]
-#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4
-
-/* ========================================================================= */
-local unsigned long crc32_little(crc, buf, len)
-    unsigned long crc;
-    const unsigned char FAR *buf;
-    z_size_t len;
-{
-    register z_crc_t c;
-    register const z_crc_t FAR *buf4;
-
-    c = (z_crc_t)crc;
-    c = ~c;
-    while (len && ((ptrdiff_t)buf & 3)) {
-        c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
-        len--;
-    }
-
-    buf4 = (const z_crc_t FAR *)(const void FAR *)buf;
-    while (len >= 32) {
-        DOLIT32;
-        len -= 32;
-    }
-    while (len >= 4) {
-        DOLIT4;
-        len -= 4;
-    }
-    buf = (const unsigned char FAR *)buf4;
-
-    if (len) do {
-        c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
-    } while (--len);
-    c = ~c;
-    return (unsigned long)c;
-}
-
-/* ========================================================================= */
-#define DOBIG4 c ^= *buf4++; \
-        c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
-            crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
-#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
-
-/* ========================================================================= */
-local unsigned long crc32_big(crc, buf, len)
-    unsigned long crc;
-    const unsigned char FAR *buf;
-    z_size_t len;
-{
-    register z_crc_t c;
-    register const z_crc_t FAR *buf4;
-
-    c = ZSWAP32((z_crc_t)crc);
-    c = ~c;
-    while (len && ((ptrdiff_t)buf & 3)) {
-        c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
-        len--;
-    }
-
-    buf4 = (const z_crc_t FAR *)(const void FAR *)buf;
-    while (len >= 32) {
-        DOBIG32;
-        len -= 32;
-    }
-    while (len >= 4) {
-        DOBIG4;
-        len -= 4;
-    }
-    buf = (const unsigned char FAR *)buf4;
-
-    if (len) do {
-        c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
-    } while (--len);
-    c = ~c;
-    return (unsigned long)(ZSWAP32(c));
-}
-
-#endif /* BYFOUR */
-
-#define GF2_DIM 32      /* dimension of GF(2) vectors (length of CRC) */
-
-/* ========================================================================= */
-local unsigned long gf2_matrix_times(mat, vec)
-    unsigned long *mat;
-    unsigned long vec;
-{
-    unsigned long sum;
-
-    sum = 0;
-    while (vec) {
-        if (vec & 1)
-            sum ^= *mat;
-        vec >>= 1;
-        mat++;
-    }
-    return sum;
-}
-
-/* ========================================================================= */
-local void gf2_matrix_square(square, mat)
-    unsigned long *square;
-    unsigned long *mat;
-{
-    int n;
-
-    for (n = 0; n < GF2_DIM; n++)
-        square[n] = gf2_matrix_times(mat, mat[n]);
-}
-
-/* ========================================================================= */
-local uLong crc32_combine_(crc1, crc2, len2)
-    uLong crc1;
-    uLong crc2;
-    z_off64_t len2;
-{
-    int n;
-    unsigned long row;
-    unsigned long even[GF2_DIM];    /* even-power-of-two zeros operator */
-    unsigned long odd[GF2_DIM];     /* odd-power-of-two zeros operator */
-
-    /* degenerate case (also disallow negative lengths) */
-    if (len2 <= 0)
-        return crc1;
-
-    /* put operator for one zero bit in odd */
-    odd[0] = 0xedb88320UL;          /* CRC-32 polynomial */
-    row = 1;
-    for (n = 1; n < GF2_DIM; n++) {
-        odd[n] = row;
-        row <<= 1;
-    }
-
-    /* put operator for two zero bits in even */
-    gf2_matrix_square(even, odd);
-
-    /* put operator for four zero bits in odd */
-    gf2_matrix_square(odd, even);
-
-    /* apply len2 zeros to crc1 (first square will put the operator for one
-       zero byte, eight zero bits, in even) */
-    do {
-        /* apply zeros operator for this bit of len2 */
-        gf2_matrix_square(even, odd);
-        if (len2 & 1)
-            crc1 = gf2_matrix_times(even, crc1);
-        len2 >>= 1;
-
-        /* if no more bits set, then done */
-        if (len2 == 0)
-            break;
-
-        /* another iteration of the loop with odd and even swapped */
-        gf2_matrix_square(odd, even);
-        if (len2 & 1)
-            crc1 = gf2_matrix_times(odd, crc1);
-        len2 >>= 1;
-
-        /* if no more bits set, then done */
-    } while (len2 != 0);
-
-    /* return combined crc */
-    crc1 ^= crc2;
-    return crc1;
-}
-
-/* ========================================================================= */
-uLong ZEXPORT crc32_combine(crc1, crc2, len2)
-    uLong crc1;
-    uLong crc2;
-    z_off_t len2;
-{
-    return crc32_combine_(crc1, crc2, len2);
-}
-
-uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
-    uLong crc1;
-    uLong crc2;
-    z_off64_t len2;
-{
-    return crc32_combine_(crc1, crc2, len2);
-}
diff --git a/dep/zlib/src/crc32.h b/dep/zlib/src/crc32.h
deleted file mode 100644
index 9e0c77810..000000000
--- a/dep/zlib/src/crc32.h
+++ /dev/null
@@ -1,441 +0,0 @@
-/* crc32.h -- tables for rapid CRC calculation
- * Generated automatically by crc32.c
- */
-
-local const z_crc_t FAR crc_table[TBLS][256] =
-{
-  {
-    0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
-    0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
-    0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
-    0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
-    0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
-    0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
-    0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
-    0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
-    0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
-    0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
-    0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
-    0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
-    0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
-    0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
-    0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
-    0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
-    0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
-    0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
-    0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
-    0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
-    0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
-    0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
-    0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
-    0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
-    0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
-    0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
-    0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
-    0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
-    0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
-    0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
-    0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
-    0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
-    0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
-    0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
-    0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
-    0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
-    0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
-    0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
-    0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
-    0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
-    0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
-    0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
-    0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
-    0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
-    0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
-    0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
-    0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
-    0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
-    0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
-    0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
-    0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
-    0x2d02ef8dUL
-#ifdef BYFOUR
-  },
-  {
-    0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL,
-    0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL,
-    0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL,
-    0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL,
-    0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL,
-    0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL,
-    0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL,
-    0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL,
-    0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL,
-    0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL,
-    0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL,
-    0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL,
-    0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL,
-    0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL,
-    0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL,
-    0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL,
-    0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL,
-    0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL,
-    0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL,
-    0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL,
-    0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL,
-    0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL,
-    0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL,
-    0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL,
-    0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL,
-    0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL,
-    0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL,
-    0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL,
-    0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL,
-    0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL,
-    0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL,
-    0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL,
-    0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL,
-    0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL,
-    0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL,
-    0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL,
-    0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL,
-    0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL,
-    0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL,
-    0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL,
-    0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL,
-    0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL,
-    0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL,
-    0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL,
-    0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL,
-    0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL,
-    0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL,
-    0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL,
-    0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL,
-    0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL,
-    0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL,
-    0x9324fd72UL
-  },
-  {
-    0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL,
-    0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL,
-    0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL,
-    0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL,
-    0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL,
-    0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL,
-    0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL,
-    0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL,
-    0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL,
-    0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL,
-    0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL,
-    0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL,
-    0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL,
-    0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL,
-    0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL,
-    0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL,
-    0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL,
-    0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL,
-    0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL,
-    0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL,
-    0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL,
-    0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL,
-    0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL,
-    0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL,
-    0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL,
-    0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL,
-    0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL,
-    0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL,
-    0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL,
-    0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL,
-    0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL,
-    0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL,
-    0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL,
-    0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL,
-    0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL,
-    0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL,
-    0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL,
-    0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL,
-    0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL,
-    0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL,
-    0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL,
-    0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL,
-    0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL,
-    0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL,
-    0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL,
-    0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL,
-    0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL,
-    0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL,
-    0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL,
-    0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL,
-    0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL,
-    0xbe9834edUL
-  },
-  {
-    0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL,
-    0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL,
-    0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL,
-    0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL,
-    0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL,
-    0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL,
-    0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL,
-    0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL,
-    0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL,
-    0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL,
-    0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL,
-    0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL,
-    0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL,
-    0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL,
-    0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL,
-    0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL,
-    0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL,
-    0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL,
-    0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL,
-    0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL,
-    0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL,
-    0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL,
-    0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL,
-    0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL,
-    0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL,
-    0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL,
-    0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL,
-    0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL,
-    0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL,
-    0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL,
-    0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL,
-    0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL,
-    0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL,
-    0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL,
-    0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL,
-    0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL,
-    0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL,
-    0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL,
-    0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL,
-    0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL,
-    0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL,
-    0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL,
-    0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL,
-    0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL,
-    0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL,
-    0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL,
-    0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL,
-    0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL,
-    0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL,
-    0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL,
-    0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL,
-    0xde0506f1UL
-  },
-  {
-    0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL,
-    0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL,
-    0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL,
-    0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL,
-    0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL,
-    0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL,
-    0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL,
-    0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL,
-    0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL,
-    0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL,
-    0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL,
-    0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL,
-    0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL,
-    0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL,
-    0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL,
-    0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL,
-    0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL,
-    0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL,
-    0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL,
-    0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL,
-    0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL,
-    0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL,
-    0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL,
-    0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL,
-    0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL,
-    0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL,
-    0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL,
-    0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL,
-    0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL,
-    0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL,
-    0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL,
-    0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL,
-    0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL,
-    0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL,
-    0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL,
-    0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL,
-    0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL,
-    0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL,
-    0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL,
-    0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL,
-    0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL,
-    0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL,
-    0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL,
-    0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL,
-    0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL,
-    0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL,
-    0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL,
-    0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL,
-    0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL,
-    0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL,
-    0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL,
-    0x8def022dUL
-  },
-  {
-    0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL,
-    0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL,
-    0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL,
-    0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL,
-    0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL,
-    0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL,
-    0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL,
-    0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL,
-    0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL,
-    0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL,
-    0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL,
-    0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL,
-    0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL,
-    0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL,
-    0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL,
-    0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL,
-    0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL,
-    0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL,
-    0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL,
-    0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL,
-    0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL,
-    0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL,
-    0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL,
-    0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL,
-    0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL,
-    0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL,
-    0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL,
-    0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL,
-    0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL,
-    0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL,
-    0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL,
-    0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL,
-    0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL,
-    0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL,
-    0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL,
-    0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL,
-    0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL,
-    0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL,
-    0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL,
-    0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL,
-    0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL,
-    0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL,
-    0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL,
-    0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL,
-    0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL,
-    0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL,
-    0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL,
-    0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL,
-    0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL,
-    0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL,
-    0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL,
-    0x72fd2493UL
-  },
-  {
-    0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL,
-    0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL,
-    0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL,
-    0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL,
-    0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL,
-    0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL,
-    0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL,
-    0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL,
-    0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL,
-    0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL,
-    0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL,
-    0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL,
-    0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL,
-    0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL,
-    0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL,
-    0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL,
-    0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL,
-    0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL,
-    0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL,
-    0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL,
-    0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL,
-    0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL,
-    0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL,
-    0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL,
-    0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL,
-    0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL,
-    0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL,
-    0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL,
-    0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL,
-    0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL,
-    0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL,
-    0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL,
-    0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL,
-    0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL,
-    0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL,
-    0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL,
-    0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL,
-    0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL,
-    0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL,
-    0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL,
-    0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL,
-    0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL,
-    0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL,
-    0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL,
-    0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL,
-    0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL,
-    0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL,
-    0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL,
-    0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL,
-    0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL,
-    0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL,
-    0xed3498beUL
-  },
-  {
-    0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL,
-    0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL,
-    0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL,
-    0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL,
-    0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL,
-    0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL,
-    0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL,
-    0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL,
-    0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL,
-    0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL,
-    0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL,
-    0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL,
-    0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL,
-    0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL,
-    0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL,
-    0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL,
-    0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL,
-    0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL,
-    0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL,
-    0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL,
-    0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL,
-    0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL,
-    0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL,
-    0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL,
-    0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL,
-    0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL,
-    0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL,
-    0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL,
-    0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL,
-    0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL,
-    0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL,
-    0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL,
-    0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL,
-    0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL,
-    0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL,
-    0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL,
-    0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL,
-    0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL,
-    0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL,
-    0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL,
-    0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL,
-    0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL,
-    0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL,
-    0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL,
-    0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL,
-    0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL,
-    0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL,
-    0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL,
-    0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL,
-    0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL,
-    0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL,
-    0xf10605deUL
-#endif
-  }
-};
diff --git a/dep/zlib/src/deflate.c b/dep/zlib/src/deflate.c
deleted file mode 100644
index 1ec761448..000000000
--- a/dep/zlib/src/deflate.c
+++ /dev/null
@@ -1,2163 +0,0 @@
-/* deflate.c -- compress data using the deflation algorithm
- * Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/*
- *  ALGORITHM
- *
- *      The "deflation" process depends on being able to identify portions
- *      of the input text which are identical to earlier input (within a
- *      sliding window trailing behind the input currently being processed).
- *
- *      The most straightforward technique turns out to be the fastest for
- *      most input files: try all possible matches and select the longest.
- *      The key feature of this algorithm is that insertions into the string
- *      dictionary are very simple and thus fast, and deletions are avoided
- *      completely. Insertions are performed at each input character, whereas
- *      string matches are performed only when the previous match ends. So it
- *      is preferable to spend more time in matches to allow very fast string
- *      insertions and avoid deletions. The matching algorithm for small
- *      strings is inspired from that of Rabin & Karp. A brute force approach
- *      is used to find longer strings when a small match has been found.
- *      A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
- *      (by Leonid Broukhis).
- *         A previous version of this file used a more sophisticated algorithm
- *      (by Fiala and Greene) which is guaranteed to run in linear amortized
- *      time, but has a larger average cost, uses more memory and is patented.
- *      However the F&G algorithm may be faster for some highly redundant
- *      files if the parameter max_chain_length (described below) is too large.
- *
- *  ACKNOWLEDGEMENTS
- *
- *      The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
- *      I found it in 'freeze' written by Leonid Broukhis.
- *      Thanks to many people for bug reports and testing.
- *
- *  REFERENCES
- *
- *      Deutsch, L.P.,"DEFLATE Compressed Data Format Specification".
- *      Available in http://tools.ietf.org/html/rfc1951
- *
- *      A description of the Rabin and Karp algorithm is given in the book
- *         "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
- *
- *      Fiala,E.R., and Greene,D.H.
- *         Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
- *
- */
-
-/* @(#) $Id$ */
-
-#include "deflate.h"
-
-const char deflate_copyright[] =
-   " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler ";
-/*
-  If you use the zlib library in a product, an acknowledgment is welcome
-  in the documentation of your product. If for some reason you cannot
-  include such an acknowledgment, I would appreciate that you keep this
-  copyright string in the executable of your product.
- */
-
-/* ===========================================================================
- *  Function prototypes.
- */
-typedef enum {
-    need_more,      /* block not completed, need more input or more output */
-    block_done,     /* block flush performed */
-    finish_started, /* finish started, need only more output at next deflate */
-    finish_done     /* finish done, accept no more input or output */
-} block_state;
-
-typedef block_state (*compress_func) OF((deflate_state *s, int flush));
-/* Compression function. Returns the block state after the call. */
-
-local int deflateStateCheck      OF((z_streamp strm));
-local void slide_hash     OF((deflate_state *s));
-local void fill_window    OF((deflate_state *s));
-local block_state deflate_stored OF((deflate_state *s, int flush));
-local block_state deflate_fast   OF((deflate_state *s, int flush));
-#ifndef FASTEST
-local block_state deflate_slow   OF((deflate_state *s, int flush));
-#endif
-local block_state deflate_rle    OF((deflate_state *s, int flush));
-local block_state deflate_huff   OF((deflate_state *s, int flush));
-local void lm_init        OF((deflate_state *s));
-local void putShortMSB    OF((deflate_state *s, uInt b));
-local void flush_pending  OF((z_streamp strm));
-local unsigned read_buf   OF((z_streamp strm, Bytef *buf, unsigned size));
-#ifdef ASMV
-#  pragma message("Assembler code may have bugs -- use at your own risk")
-      void match_init OF((void)); /* asm code initialization */
-      uInt longest_match  OF((deflate_state *s, IPos cur_match));
-#else
-local uInt longest_match  OF((deflate_state *s, IPos cur_match));
-#endif
-
-#ifdef ZLIB_DEBUG
-local  void check_match OF((deflate_state *s, IPos start, IPos match,
-                            int length));
-#endif
-
-/* ===========================================================================
- * Local data
- */
-
-#define NIL 0
-/* Tail of hash chains */
-
-#ifndef TOO_FAR
-#  define TOO_FAR 4096
-#endif
-/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
-
-/* Values for max_lazy_match, good_match and max_chain_length, depending on
- * the desired pack level (0..9). The values given below have been tuned to
- * exclude worst case performance for pathological files. Better values may be
- * found for specific files.
- */
-typedef struct config_s {
-   ush good_length; /* reduce lazy search above this match length */
-   ush max_lazy;    /* do not perform lazy search above this match length */
-   ush nice_length; /* quit search above this match length */
-   ush max_chain;
-   compress_func func;
-} config;
-
-#ifdef FASTEST
-local const config configuration_table[2] = {
-/*      good lazy nice chain */
-/* 0 */ {0,    0,  0,    0, deflate_stored},  /* store only */
-/* 1 */ {4,    4,  8,    4, deflate_fast}}; /* max speed, no lazy matches */
-#else
-local const config configuration_table[10] = {
-/*      good lazy nice chain */
-/* 0 */ {0,    0,  0,    0, deflate_stored},  /* store only */
-/* 1 */ {4,    4,  8,    4, deflate_fast}, /* max speed, no lazy matches */
-/* 2 */ {4,    5, 16,    8, deflate_fast},
-/* 3 */ {4,    6, 32,   32, deflate_fast},
-
-/* 4 */ {4,    4, 16,   16, deflate_slow},  /* lazy matches */
-/* 5 */ {8,   16, 32,   32, deflate_slow},
-/* 6 */ {8,   16, 128, 128, deflate_slow},
-/* 7 */ {8,   32, 128, 256, deflate_slow},
-/* 8 */ {32, 128, 258, 1024, deflate_slow},
-/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* max compression */
-#endif
-
-/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
- * For deflate_fast() (levels <= 3) good is ignored and lazy has a different
- * meaning.
- */
-
-/* rank Z_BLOCK between Z_NO_FLUSH and Z_PARTIAL_FLUSH */
-#define RANK(f) (((f) * 2) - ((f) > 4 ? 9 : 0))
-
-/* ===========================================================================
- * Update a hash value with the given input byte
- * IN  assertion: all calls to UPDATE_HASH are made with consecutive input
- *    characters, so that a running hash key can be computed from the previous
- *    key instead of complete recalculation each time.
- */
-#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask)
-
-
-/* ===========================================================================
- * Insert string str in the dictionary and set match_head to the previous head
- * of the hash chain (the most recent string with same hash key). Return
- * the previous length of the hash chain.
- * If this file is compiled with -DFASTEST, the compression level is forced
- * to 1, and no hash chains are maintained.
- * IN  assertion: all calls to INSERT_STRING are made with consecutive input
- *    characters and the first MIN_MATCH bytes of str are valid (except for
- *    the last MIN_MATCH-1 bytes of the input file).
- */
-#ifdef FASTEST
-#define INSERT_STRING(s, str, match_head) \
-   (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
-    match_head = s->head[s->ins_h], \
-    s->head[s->ins_h] = (Pos)(str))
-#else
-#define INSERT_STRING(s, str, match_head) \
-   (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
-    match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \
-    s->head[s->ins_h] = (Pos)(str))
-#endif
-
-/* ===========================================================================
- * Initialize the hash table (avoiding 64K overflow for 16 bit systems).
- * prev[] will be initialized on the fly.
- */
-#define CLEAR_HASH(s) \
-    s->head[s->hash_size-1] = NIL; \
-    zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head));
-
-/* ===========================================================================
- * Slide the hash table when sliding the window down (could be avoided with 32
- * bit values at the expense of memory usage). We slide even when level == 0 to
- * keep the hash table consistent if we switch back to level > 0 later.
- */
-local void slide_hash(s)
-    deflate_state *s;
-{
-    unsigned n, m;
-    Posf *p;
-    uInt wsize = s->w_size;
-
-    n = s->hash_size;
-    p = &s->head[n];
-    do {
-        m = *--p;
-        *p = (Pos)(m >= wsize ? m - wsize : NIL);
-    } while (--n);
-    n = wsize;
-#ifndef FASTEST
-    p = &s->prev[n];
-    do {
-        m = *--p;
-        *p = (Pos)(m >= wsize ? m - wsize : NIL);
-        /* If n is not on any hash chain, prev[n] is garbage but
-         * its value will never be used.
-         */
-    } while (--n);
-#endif
-}
-
-/* ========================================================================= */
-int ZEXPORT deflateInit_(strm, level, version, stream_size)
-    z_streamp strm;
-    int level;
-    const char *version;
-    int stream_size;
-{
-    return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL,
-                         Z_DEFAULT_STRATEGY, version, stream_size);
-    /* To do: ignore strm->next_in if we use it as window */
-}
-
-/* ========================================================================= */
-int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
-                  version, stream_size)
-    z_streamp strm;
-    int  level;
-    int  method;
-    int  windowBits;
-    int  memLevel;
-    int  strategy;
-    const char *version;
-    int stream_size;
-{
-    deflate_state *s;
-    int wrap = 1;
-    static const char my_version[] = ZLIB_VERSION;
-
-    ushf *overlay;
-    /* We overlay pending_buf and d_buf+l_buf. This works since the average
-     * output size for (length,distance) codes is <= 24 bits.
-     */
-
-    if (version == Z_NULL || version[0] != my_version[0] ||
-        stream_size != sizeof(z_stream)) {
-        return Z_VERSION_ERROR;
-    }
-    if (strm == Z_NULL) return Z_STREAM_ERROR;
-
-    strm->msg = Z_NULL;
-    if (strm->zalloc == (alloc_func)0) {
-#ifdef Z_SOLO
-        return Z_STREAM_ERROR;
-#else
-        strm->zalloc = zcalloc;
-        strm->opaque = (voidpf)0;
-#endif
-    }
-    if (strm->zfree == (free_func)0)
-#ifdef Z_SOLO
-        return Z_STREAM_ERROR;
-#else
-        strm->zfree = zcfree;
-#endif
-
-#ifdef FASTEST
-    if (level != 0) level = 1;
-#else
-    if (level == Z_DEFAULT_COMPRESSION) level = 6;
-#endif
-
-    if (windowBits < 0) { /* suppress zlib wrapper */
-        wrap = 0;
-        windowBits = -windowBits;
-    }
-#ifdef GZIP
-    else if (windowBits > 15) {
-        wrap = 2;       /* write gzip wrapper instead */
-        windowBits -= 16;
-    }
-#endif
-    if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED ||
-        windowBits < 8 || windowBits > 15 || level < 0 || level > 9 ||
-        strategy < 0 || strategy > Z_FIXED || (windowBits == 8 && wrap != 1)) {
-        return Z_STREAM_ERROR;
-    }
-    if (windowBits == 8) windowBits = 9;  /* until 256-byte window bug fixed */
-    s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state));
-    if (s == Z_NULL) return Z_MEM_ERROR;
-    strm->state = (struct internal_state FAR *)s;
-    s->strm = strm;
-    s->status = INIT_STATE;     /* to pass state test in deflateReset() */
-
-    s->wrap = wrap;
-    s->gzhead = Z_NULL;
-    s->w_bits = (uInt)windowBits;
-    s->w_size = 1 << s->w_bits;
-    s->w_mask = s->w_size - 1;
-
-    s->hash_bits = (uInt)memLevel + 7;
-    s->hash_size = 1 << s->hash_bits;
-    s->hash_mask = s->hash_size - 1;
-    s->hash_shift =  ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
-
-    s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte));
-    s->prev   = (Posf *)  ZALLOC(strm, s->w_size, sizeof(Pos));
-    s->head   = (Posf *)  ZALLOC(strm, s->hash_size, sizeof(Pos));
-
-    s->high_water = 0;      /* nothing written to s->window yet */
-
-    s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
-
-    overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2);
-    s->pending_buf = (uchf *) overlay;
-    s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L);
-
-    if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
-        s->pending_buf == Z_NULL) {
-        s->status = FINISH_STATE;
-        strm->msg = ERR_MSG(Z_MEM_ERROR);
-        deflateEnd (strm);
-        return Z_MEM_ERROR;
-    }
-    s->d_buf = overlay + s->lit_bufsize/sizeof(ush);
-    s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize;
-
-    s->level = level;
-    s->strategy = strategy;
-    s->method = (Byte)method;
-
-    return deflateReset(strm);
-}
-
-/* =========================================================================
- * Check for a valid deflate stream state. Return 0 if ok, 1 if not.
- */
-local int deflateStateCheck (strm)
-    z_streamp strm;
-{
-    deflate_state *s;
-    if (strm == Z_NULL ||
-        strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0)
-        return 1;
-    s = strm->state;
-    if (s == Z_NULL || s->strm != strm || (s->status != INIT_STATE &&
-#ifdef GZIP
-                                           s->status != GZIP_STATE &&
-#endif
-                                           s->status != EXTRA_STATE &&
-                                           s->status != NAME_STATE &&
-                                           s->status != COMMENT_STATE &&
-                                           s->status != HCRC_STATE &&
-                                           s->status != BUSY_STATE &&
-                                           s->status != FINISH_STATE))
-        return 1;
-    return 0;
-}
-
-/* ========================================================================= */
-int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
-    z_streamp strm;
-    const Bytef *dictionary;
-    uInt  dictLength;
-{
-    deflate_state *s;
-    uInt str, n;
-    int wrap;
-    unsigned avail;
-    z_const unsigned char *next;
-
-    if (deflateStateCheck(strm) || dictionary == Z_NULL)
-        return Z_STREAM_ERROR;
-    s = strm->state;
-    wrap = s->wrap;
-    if (wrap == 2 || (wrap == 1 && s->status != INIT_STATE) || s->lookahead)
-        return Z_STREAM_ERROR;
-
-    /* when using zlib wrappers, compute Adler-32 for provided dictionary */
-    if (wrap == 1)
-        strm->adler = adler32(strm->adler, dictionary, dictLength);
-    s->wrap = 0;                    /* avoid computing Adler-32 in read_buf */
-
-    /* if dictionary would fill window, just replace the history */
-    if (dictLength >= s->w_size) {
-        if (wrap == 0) {            /* already empty otherwise */
-            CLEAR_HASH(s);
-            s->strstart = 0;
-            s->block_start = 0L;
-            s->insert = 0;
-        }
-        dictionary += dictLength - s->w_size;  /* use the tail */
-        dictLength = s->w_size;
-    }
-
-    /* insert dictionary into window and hash */
-    avail = strm->avail_in;
-    next = strm->next_in;
-    strm->avail_in = dictLength;
-    strm->next_in = (z_const Bytef *)dictionary;
-    fill_window(s);
-    while (s->lookahead >= MIN_MATCH) {
-        str = s->strstart;
-        n = s->lookahead - (MIN_MATCH-1);
-        do {
-            UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]);
-#ifndef FASTEST
-            s->prev[str & s->w_mask] = s->head[s->ins_h];
-#endif
-            s->head[s->ins_h] = (Pos)str;
-            str++;
-        } while (--n);
-        s->strstart = str;
-        s->lookahead = MIN_MATCH-1;
-        fill_window(s);
-    }
-    s->strstart += s->lookahead;
-    s->block_start = (long)s->strstart;
-    s->insert = s->lookahead;
-    s->lookahead = 0;
-    s->match_length = s->prev_length = MIN_MATCH-1;
-    s->match_available = 0;
-    strm->next_in = next;
-    strm->avail_in = avail;
-    s->wrap = wrap;
-    return Z_OK;
-}
-
-/* ========================================================================= */
-int ZEXPORT deflateGetDictionary (strm, dictionary, dictLength)
-    z_streamp strm;
-    Bytef *dictionary;
-    uInt  *dictLength;
-{
-    deflate_state *s;
-    uInt len;
-
-    if (deflateStateCheck(strm))
-        return Z_STREAM_ERROR;
-    s = strm->state;
-    len = s->strstart + s->lookahead;
-    if (len > s->w_size)
-        len = s->w_size;
-    if (dictionary != Z_NULL && len)
-        zmemcpy(dictionary, s->window + s->strstart + s->lookahead - len, len);
-    if (dictLength != Z_NULL)
-        *dictLength = len;
-    return Z_OK;
-}
-
-/* ========================================================================= */
-int ZEXPORT deflateResetKeep (strm)
-    z_streamp strm;
-{
-    deflate_state *s;
-
-    if (deflateStateCheck(strm)) {
-        return Z_STREAM_ERROR;
-    }
-
-    strm->total_in = strm->total_out = 0;
-    strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */
-    strm->data_type = Z_UNKNOWN;
-
-    s = (deflate_state *)strm->state;
-    s->pending = 0;
-    s->pending_out = s->pending_buf;
-
-    if (s->wrap < 0) {
-        s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */
-    }
-    s->status =
-#ifdef GZIP
-        s->wrap == 2 ? GZIP_STATE :
-#endif
-        s->wrap ? INIT_STATE : BUSY_STATE;
-    strm->adler =
-#ifdef GZIP
-        s->wrap == 2 ? crc32(0L, Z_NULL, 0) :
-#endif
-        adler32(0L, Z_NULL, 0);
-    s->last_flush = Z_NO_FLUSH;
-
-    _tr_init(s);
-
-    return Z_OK;
-}
-
-/* ========================================================================= */
-int ZEXPORT deflateReset (strm)
-    z_streamp strm;
-{
-    int ret;
-
-    ret = deflateResetKeep(strm);
-    if (ret == Z_OK)
-        lm_init(strm->state);
-    return ret;
-}
-
-/* ========================================================================= */
-int ZEXPORT deflateSetHeader (strm, head)
-    z_streamp strm;
-    gz_headerp head;
-{
-    if (deflateStateCheck(strm) || strm->state->wrap != 2)
-        return Z_STREAM_ERROR;
-    strm->state->gzhead = head;
-    return Z_OK;
-}
-
-/* ========================================================================= */
-int ZEXPORT deflatePending (strm, pending, bits)
-    unsigned *pending;
-    int *bits;
-    z_streamp strm;
-{
-    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
-    if (pending != Z_NULL)
-        *pending = strm->state->pending;
-    if (bits != Z_NULL)
-        *bits = strm->state->bi_valid;
-    return Z_OK;
-}
-
-/* ========================================================================= */
-int ZEXPORT deflatePrime (strm, bits, value)
-    z_streamp strm;
-    int bits;
-    int value;
-{
-    deflate_state *s;
-    int put;
-
-    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
-    s = strm->state;
-    if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3))
-        return Z_BUF_ERROR;
-    do {
-        put = Buf_size - s->bi_valid;
-        if (put > bits)
-            put = bits;
-        s->bi_buf |= (ush)((value & ((1 << put) - 1)) << s->bi_valid);
-        s->bi_valid += put;
-        _tr_flush_bits(s);
-        value >>= put;
-        bits -= put;
-    } while (bits);
-    return Z_OK;
-}
-
-/* ========================================================================= */
-int ZEXPORT deflateParams(strm, level, strategy)
-    z_streamp strm;
-    int level;
-    int strategy;
-{
-    deflate_state *s;
-    compress_func func;
-
-    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
-    s = strm->state;
-
-#ifdef FASTEST
-    if (level != 0) level = 1;
-#else
-    if (level == Z_DEFAULT_COMPRESSION) level = 6;
-#endif
-    if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) {
-        return Z_STREAM_ERROR;
-    }
-    func = configuration_table[s->level].func;
-
-    if ((strategy != s->strategy || func != configuration_table[level].func) &&
-        s->high_water) {
-        /* Flush the last buffer: */
-        int err = deflate(strm, Z_BLOCK);
-        if (err == Z_STREAM_ERROR)
-            return err;
-        if (strm->avail_out == 0)
-            return Z_BUF_ERROR;
-    }
-    if (s->level != level) {
-        if (s->level == 0 && s->matches != 0) {
-            if (s->matches == 1)
-                slide_hash(s);
-            else
-                CLEAR_HASH(s);
-            s->matches = 0;
-        }
-        s->level = level;
-        s->max_lazy_match   = configuration_table[level].max_lazy;
-        s->good_match       = configuration_table[level].good_length;
-        s->nice_match       = configuration_table[level].nice_length;
-        s->max_chain_length = configuration_table[level].max_chain;
-    }
-    s->strategy = strategy;
-    return Z_OK;
-}
-
-/* ========================================================================= */
-int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain)
-    z_streamp strm;
-    int good_length;
-    int max_lazy;
-    int nice_length;
-    int max_chain;
-{
-    deflate_state *s;
-
-    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
-    s = strm->state;
-    s->good_match = (uInt)good_length;
-    s->max_lazy_match = (uInt)max_lazy;
-    s->nice_match = nice_length;
-    s->max_chain_length = (uInt)max_chain;
-    return Z_OK;
-}
-
-/* =========================================================================
- * For the default windowBits of 15 and memLevel of 8, this function returns
- * a close to exact, as well as small, upper bound on the compressed size.
- * They are coded as constants here for a reason--if the #define's are
- * changed, then this function needs to be changed as well.  The return
- * value for 15 and 8 only works for those exact settings.
- *
- * For any setting other than those defaults for windowBits and memLevel,
- * the value returned is a conservative worst case for the maximum expansion
- * resulting from using fixed blocks instead of stored blocks, which deflate
- * can emit on compressed data for some combinations of the parameters.
- *
- * This function could be more sophisticated to provide closer upper bounds for
- * every combination of windowBits and memLevel.  But even the conservative
- * upper bound of about 14% expansion does not seem onerous for output buffer
- * allocation.
- */
-uLong ZEXPORT deflateBound(strm, sourceLen)
-    z_streamp strm;
-    uLong sourceLen;
-{
-    deflate_state *s;
-    uLong complen, wraplen;
-
-    /* conservative upper bound for compressed data */
-    complen = sourceLen +
-              ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 5;
-
-    /* if can't get parameters, return conservative bound plus zlib wrapper */
-    if (deflateStateCheck(strm))
-        return complen + 6;
-
-    /* compute wrapper length */
-    s = strm->state;
-    switch (s->wrap) {
-    case 0:                                 /* raw deflate */
-        wraplen = 0;
-        break;
-    case 1:                                 /* zlib wrapper */
-        wraplen = 6 + (s->strstart ? 4 : 0);
-        break;
-#ifdef GZIP
-    case 2:                                 /* gzip wrapper */
-        wraplen = 18;
-        if (s->gzhead != Z_NULL) {          /* user-supplied gzip header */
-            Bytef *str;
-            if (s->gzhead->extra != Z_NULL)
-                wraplen += 2 + s->gzhead->extra_len;
-            str = s->gzhead->name;
-            if (str != Z_NULL)
-                do {
-                    wraplen++;
-                } while (*str++);
-            str = s->gzhead->comment;
-            if (str != Z_NULL)
-                do {
-                    wraplen++;
-                } while (*str++);
-            if (s->gzhead->hcrc)
-                wraplen += 2;
-        }
-        break;
-#endif
-    default:                                /* for compiler happiness */
-        wraplen = 6;
-    }
-
-    /* if not default parameters, return conservative bound */
-    if (s->w_bits != 15 || s->hash_bits != 8 + 7)
-        return complen + wraplen;
-
-    /* default settings: return tight bound for that case */
-    return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) +
-           (sourceLen >> 25) + 13 - 6 + wraplen;
-}
-
-/* =========================================================================
- * Put a short in the pending buffer. The 16-bit value is put in MSB order.
- * IN assertion: the stream state is correct and there is enough room in
- * pending_buf.
- */
-local void putShortMSB (s, b)
-    deflate_state *s;
-    uInt b;
-{
-    put_byte(s, (Byte)(b >> 8));
-    put_byte(s, (Byte)(b & 0xff));
-}
-
-/* =========================================================================
- * Flush as much pending output as possible. All deflate() output, except for
- * some deflate_stored() output, goes through this function so some
- * applications may wish to modify it to avoid allocating a large
- * strm->next_out buffer and copying into it. (See also read_buf()).
- */
-local void flush_pending(strm)
-    z_streamp strm;
-{
-    unsigned len;
-    deflate_state *s = strm->state;
-
-    _tr_flush_bits(s);
-    len = s->pending;
-    if (len > strm->avail_out) len = strm->avail_out;
-    if (len == 0) return;
-
-    zmemcpy(strm->next_out, s->pending_out, len);
-    strm->next_out  += len;
-    s->pending_out  += len;
-    strm->total_out += len;
-    strm->avail_out -= len;
-    s->pending      -= len;
-    if (s->pending == 0) {
-        s->pending_out = s->pending_buf;
-    }
-}
-
-/* ===========================================================================
- * Update the header CRC with the bytes s->pending_buf[beg..s->pending - 1].
- */
-#define HCRC_UPDATE(beg) \
-    do { \
-        if (s->gzhead->hcrc && s->pending > (beg)) \
-            strm->adler = crc32(strm->adler, s->pending_buf + (beg), \
-                                s->pending - (beg)); \
-    } while (0)
-
-/* ========================================================================= */
-int ZEXPORT deflate (strm, flush)
-    z_streamp strm;
-    int flush;
-{
-    int old_flush; /* value of flush param for previous deflate call */
-    deflate_state *s;
-
-    if (deflateStateCheck(strm) || flush > Z_BLOCK || flush < 0) {
-        return Z_STREAM_ERROR;
-    }
-    s = strm->state;
-
-    if (strm->next_out == Z_NULL ||
-        (strm->avail_in != 0 && strm->next_in == Z_NULL) ||
-        (s->status == FINISH_STATE && flush != Z_FINISH)) {
-        ERR_RETURN(strm, Z_STREAM_ERROR);
-    }
-    if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR);
-
-    old_flush = s->last_flush;
-    s->last_flush = flush;
-
-    /* Flush as much pending output as possible */
-    if (s->pending != 0) {
-        flush_pending(strm);
-        if (strm->avail_out == 0) {
-            /* Since avail_out is 0, deflate will be called again with
-             * more output space, but possibly with both pending and
-             * avail_in equal to zero. There won't be anything to do,
-             * but this is not an error situation so make sure we
-             * return OK instead of BUF_ERROR at next call of deflate:
-             */
-            s->last_flush = -1;
-            return Z_OK;
-        }
-
-    /* Make sure there is something to do and avoid duplicate consecutive
-     * flushes. For repeated and useless calls with Z_FINISH, we keep
-     * returning Z_STREAM_END instead of Z_BUF_ERROR.
-     */
-    } else if (strm->avail_in == 0 && RANK(flush) <= RANK(old_flush) &&
-               flush != Z_FINISH) {
-        ERR_RETURN(strm, Z_BUF_ERROR);
-    }
-
-    /* User must not provide more input after the first FINISH: */
-    if (s->status == FINISH_STATE && strm->avail_in != 0) {
-        ERR_RETURN(strm, Z_BUF_ERROR);
-    }
-
-    /* Write the header */
-    if (s->status == INIT_STATE) {
-        /* zlib header */
-        uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8;
-        uInt level_flags;
-
-        if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2)
-            level_flags = 0;
-        else if (s->level < 6)
-            level_flags = 1;
-        else if (s->level == 6)
-            level_flags = 2;
-        else
-            level_flags = 3;
-        header |= (level_flags << 6);
-        if (s->strstart != 0) header |= PRESET_DICT;
-        header += 31 - (header % 31);
-
-        putShortMSB(s, header);
-
-        /* Save the adler32 of the preset dictionary: */
-        if (s->strstart != 0) {
-            putShortMSB(s, (uInt)(strm->adler >> 16));
-            putShortMSB(s, (uInt)(strm->adler & 0xffff));
-        }
-        strm->adler = adler32(0L, Z_NULL, 0);
-        s->status = BUSY_STATE;
-
-        /* Compression must start with an empty pending buffer */
-        flush_pending(strm);
-        if (s->pending != 0) {
-            s->last_flush = -1;
-            return Z_OK;
-        }
-    }
-#ifdef GZIP
-    if (s->status == GZIP_STATE) {
-        /* gzip header */
-        strm->adler = crc32(0L, Z_NULL, 0);
-        put_byte(s, 31);
-        put_byte(s, 139);
-        put_byte(s, 8);
-        if (s->gzhead == Z_NULL) {
-            put_byte(s, 0);
-            put_byte(s, 0);
-            put_byte(s, 0);
-            put_byte(s, 0);
-            put_byte(s, 0);
-            put_byte(s, s->level == 9 ? 2 :
-                     (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
-                      4 : 0));
-            put_byte(s, OS_CODE);
-            s->status = BUSY_STATE;
-
-            /* Compression must start with an empty pending buffer */
-            flush_pending(strm);
-            if (s->pending != 0) {
-                s->last_flush = -1;
-                return Z_OK;
-            }
-        }
-        else {
-            put_byte(s, (s->gzhead->text ? 1 : 0) +
-                     (s->gzhead->hcrc ? 2 : 0) +
-                     (s->gzhead->extra == Z_NULL ? 0 : 4) +
-                     (s->gzhead->name == Z_NULL ? 0 : 8) +
-                     (s->gzhead->comment == Z_NULL ? 0 : 16)
-                     );
-            put_byte(s, (Byte)(s->gzhead->time & 0xff));
-            put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff));
-            put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff));
-            put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff));
-            put_byte(s, s->level == 9 ? 2 :
-                     (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
-                      4 : 0));
-            put_byte(s, s->gzhead->os & 0xff);
-            if (s->gzhead->extra != Z_NULL) {
-                put_byte(s, s->gzhead->extra_len & 0xff);
-                put_byte(s, (s->gzhead->extra_len >> 8) & 0xff);
-            }
-            if (s->gzhead->hcrc)
-                strm->adler = crc32(strm->adler, s->pending_buf,
-                                    s->pending);
-            s->gzindex = 0;
-            s->status = EXTRA_STATE;
-        }
-    }
-    if (s->status == EXTRA_STATE) {
-        if (s->gzhead->extra != Z_NULL) {
-            ulg beg = s->pending;   /* start of bytes to update crc */
-            uInt left = (s->gzhead->extra_len & 0xffff) - s->gzindex;
-            while (s->pending + left > s->pending_buf_size) {
-                uInt copy = s->pending_buf_size - s->pending;
-                zmemcpy(s->pending_buf + s->pending,
-                        s->gzhead->extra + s->gzindex, copy);
-                s->pending = s->pending_buf_size;
-                HCRC_UPDATE(beg);
-                s->gzindex += copy;
-                flush_pending(strm);
-                if (s->pending != 0) {
-                    s->last_flush = -1;
-                    return Z_OK;
-                }
-                beg = 0;
-                left -= copy;
-            }
-            zmemcpy(s->pending_buf + s->pending,
-                    s->gzhead->extra + s->gzindex, left);
-            s->pending += left;
-            HCRC_UPDATE(beg);
-            s->gzindex = 0;
-        }
-        s->status = NAME_STATE;
-    }
-    if (s->status == NAME_STATE) {
-        if (s->gzhead->name != Z_NULL) {
-            ulg beg = s->pending;   /* start of bytes to update crc */
-            int val;
-            do {
-                if (s->pending == s->pending_buf_size) {
-                    HCRC_UPDATE(beg);
-                    flush_pending(strm);
-                    if (s->pending != 0) {
-                        s->last_flush = -1;
-                        return Z_OK;
-                    }
-                    beg = 0;
-                }
-                val = s->gzhead->name[s->gzindex++];
-                put_byte(s, val);
-            } while (val != 0);
-            HCRC_UPDATE(beg);
-            s->gzindex = 0;
-        }
-        s->status = COMMENT_STATE;
-    }
-    if (s->status == COMMENT_STATE) {
-        if (s->gzhead->comment != Z_NULL) {
-            ulg beg = s->pending;   /* start of bytes to update crc */
-            int val;
-            do {
-                if (s->pending == s->pending_buf_size) {
-                    HCRC_UPDATE(beg);
-                    flush_pending(strm);
-                    if (s->pending != 0) {
-                        s->last_flush = -1;
-                        return Z_OK;
-                    }
-                    beg = 0;
-                }
-                val = s->gzhead->comment[s->gzindex++];
-                put_byte(s, val);
-            } while (val != 0);
-            HCRC_UPDATE(beg);
-        }
-        s->status = HCRC_STATE;
-    }
-    if (s->status == HCRC_STATE) {
-        if (s->gzhead->hcrc) {
-            if (s->pending + 2 > s->pending_buf_size) {
-                flush_pending(strm);
-                if (s->pending != 0) {
-                    s->last_flush = -1;
-                    return Z_OK;
-                }
-            }
-            put_byte(s, (Byte)(strm->adler & 0xff));
-            put_byte(s, (Byte)((strm->adler >> 8) & 0xff));
-            strm->adler = crc32(0L, Z_NULL, 0);
-        }
-        s->status = BUSY_STATE;
-
-        /* Compression must start with an empty pending buffer */
-        flush_pending(strm);
-        if (s->pending != 0) {
-            s->last_flush = -1;
-            return Z_OK;
-        }
-    }
-#endif
-
-    /* Start a new block or continue the current one.
-     */
-    if (strm->avail_in != 0 || s->lookahead != 0 ||
-        (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) {
-        block_state bstate;
-
-        bstate = s->level == 0 ? deflate_stored(s, flush) :
-                 s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) :
-                 s->strategy == Z_RLE ? deflate_rle(s, flush) :
-                 (*(configuration_table[s->level].func))(s, flush);
-
-        if (bstate == finish_started || bstate == finish_done) {
-            s->status = FINISH_STATE;
-        }
-        if (bstate == need_more || bstate == finish_started) {
-            if (strm->avail_out == 0) {
-                s->last_flush = -1; /* avoid BUF_ERROR next call, see above */
-            }
-            return Z_OK;
-            /* If flush != Z_NO_FLUSH && avail_out == 0, the next call
-             * of deflate should use the same flush parameter to make sure
-             * that the flush is complete. So we don't have to output an
-             * empty block here, this will be done at next call. This also
-             * ensures that for a very small output buffer, we emit at most
-             * one empty block.
-             */
-        }
-        if (bstate == block_done) {
-            if (flush == Z_PARTIAL_FLUSH) {
-                _tr_align(s);
-            } else if (flush != Z_BLOCK) { /* FULL_FLUSH or SYNC_FLUSH */
-                _tr_stored_block(s, (char*)0, 0L, 0);
-                /* For a full flush, this empty block will be recognized
-                 * as a special marker by inflate_sync().
-                 */
-                if (flush == Z_FULL_FLUSH) {
-                    CLEAR_HASH(s);             /* forget history */
-                    if (s->lookahead == 0) {
-                        s->strstart = 0;
-                        s->block_start = 0L;
-                        s->insert = 0;
-                    }
-                }
-            }
-            flush_pending(strm);
-            if (strm->avail_out == 0) {
-              s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */
-              return Z_OK;
-            }
-        }
-    }
-
-    if (flush != Z_FINISH) return Z_OK;
-    if (s->wrap <= 0) return Z_STREAM_END;
-
-    /* Write the trailer */
-#ifdef GZIP
-    if (s->wrap == 2) {
-        put_byte(s, (Byte)(strm->adler & 0xff));
-        put_byte(s, (Byte)((strm->adler >> 8) & 0xff));
-        put_byte(s, (Byte)((strm->adler >> 16) & 0xff));
-        put_byte(s, (Byte)((strm->adler >> 24) & 0xff));
-        put_byte(s, (Byte)(strm->total_in & 0xff));
-        put_byte(s, (Byte)((strm->total_in >> 8) & 0xff));
-        put_byte(s, (Byte)((strm->total_in >> 16) & 0xff));
-        put_byte(s, (Byte)((strm->total_in >> 24) & 0xff));
-    }
-    else
-#endif
-    {
-        putShortMSB(s, (uInt)(strm->adler >> 16));
-        putShortMSB(s, (uInt)(strm->adler & 0xffff));
-    }
-    flush_pending(strm);
-    /* If avail_out is zero, the application will call deflate again
-     * to flush the rest.
-     */
-    if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */
-    return s->pending != 0 ? Z_OK : Z_STREAM_END;
-}
-
-/* ========================================================================= */
-int ZEXPORT deflateEnd (strm)
-    z_streamp strm;
-{
-    int status;
-
-    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
-
-    status = strm->state->status;
-
-    /* Deallocate in reverse order of allocations: */
-    TRY_FREE(strm, strm->state->pending_buf);
-    TRY_FREE(strm, strm->state->head);
-    TRY_FREE(strm, strm->state->prev);
-    TRY_FREE(strm, strm->state->window);
-
-    ZFREE(strm, strm->state);
-    strm->state = Z_NULL;
-
-    return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
-}
-
-/* =========================================================================
- * Copy the source state to the destination state.
- * To simplify the source, this is not supported for 16-bit MSDOS (which
- * doesn't have enough memory anyway to duplicate compression states).
- */
-int ZEXPORT deflateCopy (dest, source)
-    z_streamp dest;
-    z_streamp source;
-{
-#ifdef MAXSEG_64K
-    return Z_STREAM_ERROR;
-#else
-    deflate_state *ds;
-    deflate_state *ss;
-    ushf *overlay;
-
-
-    if (deflateStateCheck(source) || dest == Z_NULL) {
-        return Z_STREAM_ERROR;
-    }
-
-    ss = source->state;
-
-    zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream));
-
-    ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state));
-    if (ds == Z_NULL) return Z_MEM_ERROR;
-    dest->state = (struct internal_state FAR *) ds;
-    zmemcpy((voidpf)ds, (voidpf)ss, sizeof(deflate_state));
-    ds->strm = dest;
-
-    ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte));
-    ds->prev   = (Posf *)  ZALLOC(dest, ds->w_size, sizeof(Pos));
-    ds->head   = (Posf *)  ZALLOC(dest, ds->hash_size, sizeof(Pos));
-    overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2);
-    ds->pending_buf = (uchf *) overlay;
-
-    if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL ||
-        ds->pending_buf == Z_NULL) {
-        deflateEnd (dest);
-        return Z_MEM_ERROR;
-    }
-    /* following zmemcpy do not work for 16-bit MSDOS */
-    zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte));
-    zmemcpy((voidpf)ds->prev, (voidpf)ss->prev, ds->w_size * sizeof(Pos));
-    zmemcpy((voidpf)ds->head, (voidpf)ss->head, ds->hash_size * sizeof(Pos));
-    zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
-
-    ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
-    ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
-    ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
-
-    ds->l_desc.dyn_tree = ds->dyn_ltree;
-    ds->d_desc.dyn_tree = ds->dyn_dtree;
-    ds->bl_desc.dyn_tree = ds->bl_tree;
-
-    return Z_OK;
-#endif /* MAXSEG_64K */
-}
-
-/* ===========================================================================
- * Read a new buffer from the current input stream, update the adler32
- * and total number of bytes read.  All deflate() input goes through
- * this function so some applications may wish to modify it to avoid
- * allocating a large strm->next_in buffer and copying from it.
- * (See also flush_pending()).
- */
-local unsigned read_buf(strm, buf, size)
-    z_streamp strm;
-    Bytef *buf;
-    unsigned size;
-{
-    unsigned len = strm->avail_in;
-
-    if (len > size) len = size;
-    if (len == 0) return 0;
-
-    strm->avail_in  -= len;
-
-    zmemcpy(buf, strm->next_in, len);
-    if (strm->state->wrap == 1) {
-        strm->adler = adler32(strm->adler, buf, len);
-    }
-#ifdef GZIP
-    else if (strm->state->wrap == 2) {
-        strm->adler = crc32(strm->adler, buf, len);
-    }
-#endif
-    strm->next_in  += len;
-    strm->total_in += len;
-
-    return len;
-}
-
-/* ===========================================================================
- * Initialize the "longest match" routines for a new zlib stream
- */
-local void lm_init (s)
-    deflate_state *s;
-{
-    s->window_size = (ulg)2L*s->w_size;
-
-    CLEAR_HASH(s);
-
-    /* Set the default configuration parameters:
-     */
-    s->max_lazy_match   = configuration_table[s->level].max_lazy;
-    s->good_match       = configuration_table[s->level].good_length;
-    s->nice_match       = configuration_table[s->level].nice_length;
-    s->max_chain_length = configuration_table[s->level].max_chain;
-
-    s->strstart = 0;
-    s->block_start = 0L;
-    s->lookahead = 0;
-    s->insert = 0;
-    s->match_length = s->prev_length = MIN_MATCH-1;
-    s->match_available = 0;
-    s->ins_h = 0;
-#ifndef FASTEST
-#ifdef ASMV
-    match_init(); /* initialize the asm code */
-#endif
-#endif
-}
-
-#ifndef FASTEST
-/* ===========================================================================
- * Set match_start to the longest match starting at the given string and
- * return its length. Matches shorter or equal to prev_length are discarded,
- * in which case the result is equal to prev_length and match_start is
- * garbage.
- * IN assertions: cur_match is the head of the hash chain for the current
- *   string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
- * OUT assertion: the match length is not greater than s->lookahead.
- */
-#ifndef ASMV
-/* For 80x86 and 680x0, an optimized version will be provided in match.asm or
- * match.S. The code will be functionally equivalent.
- */
-local uInt longest_match(s, cur_match)
-    deflate_state *s;
-    IPos cur_match;                             /* current match */
-{
-    unsigned chain_length = s->max_chain_length;/* max hash chain length */
-    register Bytef *scan = s->window + s->strstart; /* current string */
-    register Bytef *match;                      /* matched string */
-    register int len;                           /* length of current match */
-    int best_len = (int)s->prev_length;         /* best match length so far */
-    int nice_match = s->nice_match;             /* stop if match long enough */
-    IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
-        s->strstart - (IPos)MAX_DIST(s) : NIL;
-    /* Stop when cur_match becomes <= limit. To simplify the code,
-     * we prevent matches with the string of window index 0.
-     */
-    Posf *prev = s->prev;
-    uInt wmask = s->w_mask;
-
-#ifdef UNALIGNED_OK
-    /* Compare two bytes at a time. Note: this is not always beneficial.
-     * Try with and without -DUNALIGNED_OK to check.
-     */
-    register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1;
-    register ush scan_start = *(ushf*)scan;
-    register ush scan_end   = *(ushf*)(scan+best_len-1);
-#else
-    register Bytef *strend = s->window + s->strstart + MAX_MATCH;
-    register Byte scan_end1  = scan[best_len-1];
-    register Byte scan_end   = scan[best_len];
-#endif
-
-    /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
-     * It is easy to get rid of this optimization if necessary.
-     */
-    Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
-
-    /* Do not waste too much time if we already have a good match: */
-    if (s->prev_length >= s->good_match) {
-        chain_length >>= 2;
-    }
-    /* Do not look for matches beyond the end of the input. This is necessary
-     * to make deflate deterministic.
-     */
-    if ((uInt)nice_match > s->lookahead) nice_match = (int)s->lookahead;
-
-    Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
-
-    do {
-        Assert(cur_match < s->strstart, "no future");
-        match = s->window + cur_match;
-
-        /* Skip to next match if the match length cannot increase
-         * or if the match length is less than 2.  Note that the checks below
-         * for insufficient lookahead only occur occasionally for performance
-         * reasons.  Therefore uninitialized memory will be accessed, and
-         * conditional jumps will be made that depend on those values.
-         * However the length of the match is limited to the lookahead, so
-         * the output of deflate is not affected by the uninitialized values.
-         */
-#if (defined(UNALIGNED_OK) && MAX_MATCH == 258)
-        /* This code assumes sizeof(unsigned short) == 2. Do not use
-         * UNALIGNED_OK if your compiler uses a different size.
-         */
-        if (*(ushf*)(match+best_len-1) != scan_end ||
-            *(ushf*)match != scan_start) continue;
-
-        /* It is not necessary to compare scan[2] and match[2] since they are
-         * always equal when the other bytes match, given that the hash keys
-         * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at
-         * strstart+3, +5, ... up to strstart+257. We check for insufficient
-         * lookahead only every 4th comparison; the 128th check will be made
-         * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is
-         * necessary to put more guard bytes at the end of the window, or
-         * to check more often for insufficient lookahead.
-         */
-        Assert(scan[2] == match[2], "scan[2]?");
-        scan++, match++;
-        do {
-        } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
-                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
-                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
-                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
-                 scan < strend);
-        /* The funny "do {}" generates better code on most compilers */
-
-        /* Here, scan <= window+strstart+257 */
-        Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
-        if (*scan == *match) scan++;
-
-        len = (MAX_MATCH - 1) - (int)(strend-scan);
-        scan = strend - (MAX_MATCH-1);
-
-#else /* UNALIGNED_OK */
-
-        if (match[best_len]   != scan_end  ||
-            match[best_len-1] != scan_end1 ||
-            *match            != *scan     ||
-            *++match          != scan[1])      continue;
-
-        /* The check at best_len-1 can be removed because it will be made
-         * again later. (This heuristic is not always a win.)
-         * It is not necessary to compare scan[2] and match[2] since they
-         * are always equal when the other bytes match, given that
-         * the hash keys are equal and that HASH_BITS >= 8.
-         */
-        scan += 2, match++;
-        Assert(*scan == *match, "match[2]?");
-
-        /* We check for insufficient lookahead only every 8th comparison;
-         * the 256th check will be made at strstart+258.
-         */
-        do {
-        } while (*++scan == *++match && *++scan == *++match &&
-                 *++scan == *++match && *++scan == *++match &&
-                 *++scan == *++match && *++scan == *++match &&
-                 *++scan == *++match && *++scan == *++match &&
-                 scan < strend);
-
-        Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
-
-        len = MAX_MATCH - (int)(strend - scan);
-        scan = strend - MAX_MATCH;
-
-#endif /* UNALIGNED_OK */
-
-        if (len > best_len) {
-            s->match_start = cur_match;
-            best_len = len;
-            if (len >= nice_match) break;
-#ifdef UNALIGNED_OK
-            scan_end = *(ushf*)(scan+best_len-1);
-#else
-            scan_end1  = scan[best_len-1];
-            scan_end   = scan[best_len];
-#endif
-        }
-    } while ((cur_match = prev[cur_match & wmask]) > limit
-             && --chain_length != 0);
-
-    if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
-    return s->lookahead;
-}
-#endif /* ASMV */
-
-#else /* FASTEST */
-
-/* ---------------------------------------------------------------------------
- * Optimized version for FASTEST only
- */
-local uInt longest_match(s, cur_match)
-    deflate_state *s;
-    IPos cur_match;                             /* current match */
-{
-    register Bytef *scan = s->window + s->strstart; /* current string */
-    register Bytef *match;                       /* matched string */
-    register int len;                           /* length of current match */
-    register Bytef *strend = s->window + s->strstart + MAX_MATCH;
-
-    /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
-     * It is easy to get rid of this optimization if necessary.
-     */
-    Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
-
-    Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
-
-    Assert(cur_match < s->strstart, "no future");
-
-    match = s->window + cur_match;
-
-    /* Return failure if the match length is less than 2:
-     */
-    if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1;
-
-    /* The check at best_len-1 can be removed because it will be made
-     * again later. (This heuristic is not always a win.)
-     * It is not necessary to compare scan[2] and match[2] since they
-     * are always equal when the other bytes match, given that
-     * the hash keys are equal and that HASH_BITS >= 8.
-     */
-    scan += 2, match += 2;
-    Assert(*scan == *match, "match[2]?");
-
-    /* We check for insufficient lookahead only every 8th comparison;
-     * the 256th check will be made at strstart+258.
-     */
-    do {
-    } while (*++scan == *++match && *++scan == *++match &&
-             *++scan == *++match && *++scan == *++match &&
-             *++scan == *++match && *++scan == *++match &&
-             *++scan == *++match && *++scan == *++match &&
-             scan < strend);
-
-    Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
-
-    len = MAX_MATCH - (int)(strend - scan);
-
-    if (len < MIN_MATCH) return MIN_MATCH - 1;
-
-    s->match_start = cur_match;
-    return (uInt)len <= s->lookahead ? (uInt)len : s->lookahead;
-}
-
-#endif /* FASTEST */
-
-#ifdef ZLIB_DEBUG
-
-#define EQUAL 0
-/* result of memcmp for equal strings */
-
-/* ===========================================================================
- * Check that the match at match_start is indeed a match.
- */
-local void check_match(s, start, match, length)
-    deflate_state *s;
-    IPos start, match;
-    int length;
-{
-    /* check that the match is indeed a match */
-    if (zmemcmp(s->window + match,
-                s->window + start, length) != EQUAL) {
-        fprintf(stderr, " start %u, match %u, length %d\n",
-                start, match, length);
-        do {
-            fprintf(stderr, "%c%c", s->window[match++], s->window[start++]);
-        } while (--length != 0);
-        z_error("invalid match");
-    }
-    if (z_verbose > 1) {
-        fprintf(stderr,"\\[%d,%d]", start-match, length);
-        do { putc(s->window[start++], stderr); } while (--length != 0);
-    }
-}
-#else
-#  define check_match(s, start, match, length)
-#endif /* ZLIB_DEBUG */
-
-/* ===========================================================================
- * Fill the window when the lookahead becomes insufficient.
- * Updates strstart and lookahead.
- *
- * IN assertion: lookahead < MIN_LOOKAHEAD
- * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
- *    At least one byte has been read, or avail_in == 0; reads are
- *    performed for at least two bytes (required for the zip translate_eol
- *    option -- not supported here).
- */
-local void fill_window(s)
-    deflate_state *s;
-{
-    unsigned n;
-    unsigned more;    /* Amount of free space at the end of the window. */
-    uInt wsize = s->w_size;
-
-    Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");
-
-    do {
-        more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart);
-
-        /* Deal with !@#$% 64K limit: */
-        if (sizeof(int) <= 2) {
-            if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
-                more = wsize;
-
-            } else if (more == (unsigned)(-1)) {
-                /* Very unlikely, but possible on 16 bit machine if
-                 * strstart == 0 && lookahead == 1 (input done a byte at time)
-                 */
-                more--;
-            }
-        }
-
-        /* If the window is almost full and there is insufficient lookahead,
-         * move the upper half to the lower one to make room in the upper half.
-         */
-        if (s->strstart >= wsize+MAX_DIST(s)) {
-
-            zmemcpy(s->window, s->window+wsize, (unsigned)wsize - more);
-            s->match_start -= wsize;
-            s->strstart    -= wsize; /* we now have strstart >= MAX_DIST */
-            s->block_start -= (long) wsize;
-            slide_hash(s);
-            more += wsize;
-        }
-        if (s->strm->avail_in == 0) break;
-
-        /* If there was no sliding:
-         *    strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
-         *    more == window_size - lookahead - strstart
-         * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
-         * => more >= window_size - 2*WSIZE + 2
-         * In the BIG_MEM or MMAP case (not yet supported),
-         *   window_size == input_size + MIN_LOOKAHEAD  &&
-         *   strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
-         * Otherwise, window_size == 2*WSIZE so more >= 2.
-         * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
-         */
-        Assert(more >= 2, "more < 2");
-
-        n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
-        s->lookahead += n;
-
-        /* Initialize the hash value now that we have some input: */
-        if (s->lookahead + s->insert >= MIN_MATCH) {
-            uInt str = s->strstart - s->insert;
-            s->ins_h = s->window[str];
-            UPDATE_HASH(s, s->ins_h, s->window[str + 1]);
-#if MIN_MATCH != 3
-            Call UPDATE_HASH() MIN_MATCH-3 more times
-#endif
-            while (s->insert) {
-                UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]);
-#ifndef FASTEST
-                s->prev[str & s->w_mask] = s->head[s->ins_h];
-#endif
-                s->head[s->ins_h] = (Pos)str;
-                str++;
-                s->insert--;
-                if (s->lookahead + s->insert < MIN_MATCH)
-                    break;
-            }
-        }
-        /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
-         * but this is not important since only literal bytes will be emitted.
-         */
-
-    } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
-
-    /* If the WIN_INIT bytes after the end of the current data have never been
-     * written, then zero those bytes in order to avoid memory check reports of
-     * the use of uninitialized (or uninitialised as Julian writes) bytes by
-     * the longest match routines.  Update the high water mark for the next
-     * time through here.  WIN_INIT is set to MAX_MATCH since the longest match
-     * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
-     */
-    if (s->high_water < s->window_size) {
-        ulg curr = s->strstart + (ulg)(s->lookahead);
-        ulg init;
-
-        if (s->high_water < curr) {
-            /* Previous high water mark below current data -- zero WIN_INIT
-             * bytes or up to end of window, whichever is less.
-             */
-            init = s->window_size - curr;
-            if (init > WIN_INIT)
-                init = WIN_INIT;
-            zmemzero(s->window + curr, (unsigned)init);
-            s->high_water = curr + init;
-        }
-        else if (s->high_water < (ulg)curr + WIN_INIT) {
-            /* High water mark at or above current data, but below current data
-             * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
-             * to end of window, whichever is less.
-             */
-            init = (ulg)curr + WIN_INIT - s->high_water;
-            if (init > s->window_size - s->high_water)
-                init = s->window_size - s->high_water;
-            zmemzero(s->window + s->high_water, (unsigned)init);
-            s->high_water += init;
-        }
-    }
-
-    Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD,
-           "not enough room for search");
-}
-
-/* ===========================================================================
- * Flush the current block, with given end-of-file flag.
- * IN assertion: strstart is set to the end of the current match.
- */
-#define FLUSH_BLOCK_ONLY(s, last) { \
-   _tr_flush_block(s, (s->block_start >= 0L ? \
-                   (charf *)&s->window[(unsigned)s->block_start] : \
-                   (charf *)Z_NULL), \
-                (ulg)((long)s->strstart - s->block_start), \
-                (last)); \
-   s->block_start = s->strstart; \
-   flush_pending(s->strm); \
-   Tracev((stderr,"[FLUSH]")); \
-}
-
-/* Same but force premature exit if necessary. */
-#define FLUSH_BLOCK(s, last) { \
-   FLUSH_BLOCK_ONLY(s, last); \
-   if (s->strm->avail_out == 0) return (last) ? finish_started : need_more; \
-}
-
-/* Maximum stored block length in deflate format (not including header). */
-#define MAX_STORED 65535
-
-/* Minimum of a and b. */
-#define MIN(a, b) ((a) > (b) ? (b) : (a))
-
-/* ===========================================================================
- * Copy without compression as much as possible from the input stream, return
- * the current block state.
- *
- * In case deflateParams() is used to later switch to a non-zero compression
- * level, s->matches (otherwise unused when storing) keeps track of the number
- * of hash table slides to perform. If s->matches is 1, then one hash table
- * slide will be done when switching. If s->matches is 2, the maximum value
- * allowed here, then the hash table will be cleared, since two or more slides
- * is the same as a clear.
- *
- * deflate_stored() is written to minimize the number of times an input byte is
- * copied. It is most efficient with large input and output buffers, which
- * maximizes the opportunites to have a single copy from next_in to next_out.
- */
-local block_state deflate_stored(s, flush)
-    deflate_state *s;
-    int flush;
-{
-    /* Smallest worthy block size when not flushing or finishing. By default
-     * this is 32K. This can be as small as 507 bytes for memLevel == 1. For
-     * large input and output buffers, the stored block size will be larger.
-     */
-    unsigned min_block = MIN(s->pending_buf_size - 5, s->w_size);
-
-    /* Copy as many min_block or larger stored blocks directly to next_out as
-     * possible. If flushing, copy the remaining available input to next_out as
-     * stored blocks, if there is enough space.
-     */
-    unsigned len, left, have, last = 0;
-    unsigned used = s->strm->avail_in;
-    do {
-        /* Set len to the maximum size block that we can copy directly with the
-         * available input data and output space. Set left to how much of that
-         * would be copied from what's left in the window.
-         */
-        len = MAX_STORED;       /* maximum deflate stored block length */
-        have = (s->bi_valid + 42) >> 3;         /* number of header bytes */
-        if (s->strm->avail_out < have)          /* need room for header */
-            break;
-            /* maximum stored block length that will fit in avail_out: */
-        have = s->strm->avail_out - have;
-        left = s->strstart - s->block_start;    /* bytes left in window */
-        if (len > (ulg)left + s->strm->avail_in)
-            len = left + s->strm->avail_in;     /* limit len to the input */
-        if (len > have)
-            len = have;                         /* limit len to the output */
-
-        /* If the stored block would be less than min_block in length, or if
-         * unable to copy all of the available input when flushing, then try
-         * copying to the window and the pending buffer instead. Also don't
-         * write an empty block when flushing -- deflate() does that.
-         */
-        if (len < min_block && ((len == 0 && flush != Z_FINISH) ||
-                                flush == Z_NO_FLUSH ||
-                                len != left + s->strm->avail_in))
-            break;
-
-        /* Make a dummy stored block in pending to get the header bytes,
-         * including any pending bits. This also updates the debugging counts.
-         */
-        last = flush == Z_FINISH && len == left + s->strm->avail_in ? 1 : 0;
-        _tr_stored_block(s, (char *)0, 0L, last);
-
-        /* Replace the lengths in the dummy stored block with len. */
-        s->pending_buf[s->pending - 4] = len;
-        s->pending_buf[s->pending - 3] = len >> 8;
-        s->pending_buf[s->pending - 2] = ~len;
-        s->pending_buf[s->pending - 1] = ~len >> 8;
-
-        /* Write the stored block header bytes. */
-        flush_pending(s->strm);
-
-#ifdef ZLIB_DEBUG
-        /* Update debugging counts for the data about to be copied. */
-        s->compressed_len += len << 3;
-        s->bits_sent += len << 3;
-#endif
-
-        /* Copy uncompressed bytes from the window to next_out. */
-        if (left) {
-            if (left > len)
-                left = len;
-            zmemcpy(s->strm->next_out, s->window + s->block_start, left);
-            s->strm->next_out += left;
-            s->strm->avail_out -= left;
-            s->strm->total_out += left;
-            s->block_start += left;
-            len -= left;
-        }
-
-        /* Copy uncompressed bytes directly from next_in to next_out, updating
-         * the check value.
-         */
-        if (len) {
-            read_buf(s->strm, s->strm->next_out, len);
-            s->strm->next_out += len;
-            s->strm->avail_out -= len;
-            s->strm->total_out += len;
-        }
-    } while (last == 0);
-
-    /* Update the sliding window with the last s->w_size bytes of the copied
-     * data, or append all of the copied data to the existing window if less
-     * than s->w_size bytes were copied. Also update the number of bytes to
-     * insert in the hash tables, in the event that deflateParams() switches to
-     * a non-zero compression level.
-     */
-    used -= s->strm->avail_in;      /* number of input bytes directly copied */
-    if (used) {
-        /* If any input was used, then no unused input remains in the window,
-         * therefore s->block_start == s->strstart.
-         */
-        if (used >= s->w_size) {    /* supplant the previous history */
-            s->matches = 2;         /* clear hash */
-            zmemcpy(s->window, s->strm->next_in - s->w_size, s->w_size);
-            s->strstart = s->w_size;
-        }
-        else {
-            if (s->window_size - s->strstart <= used) {
-                /* Slide the window down. */
-                s->strstart -= s->w_size;
-                zmemcpy(s->window, s->window + s->w_size, s->strstart);
-                if (s->matches < 2)
-                    s->matches++;   /* add a pending slide_hash() */
-            }
-            zmemcpy(s->window + s->strstart, s->strm->next_in - used, used);
-            s->strstart += used;
-        }
-        s->block_start = s->strstart;
-        s->insert += MIN(used, s->w_size - s->insert);
-    }
-    if (s->high_water < s->strstart)
-        s->high_water = s->strstart;
-
-    /* If the last block was written to next_out, then done. */
-    if (last)
-        return finish_done;
-
-    /* If flushing and all input has been consumed, then done. */
-    if (flush != Z_NO_FLUSH && flush != Z_FINISH &&
-        s->strm->avail_in == 0 && (long)s->strstart == s->block_start)
-        return block_done;
-
-    /* Fill the window with any remaining input. */
-    have = s->window_size - s->strstart - 1;
-    if (s->strm->avail_in > have && s->block_start >= (long)s->w_size) {
-        /* Slide the window down. */
-        s->block_start -= s->w_size;
-        s->strstart -= s->w_size;
-        zmemcpy(s->window, s->window + s->w_size, s->strstart);
-        if (s->matches < 2)
-            s->matches++;           /* add a pending slide_hash() */
-        have += s->w_size;          /* more space now */
-    }
-    if (have > s->strm->avail_in)
-        have = s->strm->avail_in;
-    if (have) {
-        read_buf(s->strm, s->window + s->strstart, have);
-        s->strstart += have;
-    }
-    if (s->high_water < s->strstart)
-        s->high_water = s->strstart;
-
-    /* There was not enough avail_out to write a complete worthy or flushed
-     * stored block to next_out. Write a stored block to pending instead, if we
-     * have enough input for a worthy block, or if flushing and there is enough
-     * room for the remaining input as a stored block in the pending buffer.
-     */
-    have = (s->bi_valid + 42) >> 3;         /* number of header bytes */
-        /* maximum stored block length that will fit in pending: */
-    have = MIN(s->pending_buf_size - have, MAX_STORED);
-    min_block = MIN(have, s->w_size);
-    left = s->strstart - s->block_start;
-    if (left >= min_block ||
-        ((left || flush == Z_FINISH) && flush != Z_NO_FLUSH &&
-         s->strm->avail_in == 0 && left <= have)) {
-        len = MIN(left, have);
-        last = flush == Z_FINISH && s->strm->avail_in == 0 &&
-               len == left ? 1 : 0;
-        _tr_stored_block(s, (charf *)s->window + s->block_start, len, last);
-        s->block_start += len;
-        flush_pending(s->strm);
-    }
-
-    /* We've done all we can with the available input and output. */
-    return last ? finish_started : need_more;
-}
-
-/* ===========================================================================
- * Compress as much as possible from the input stream, return the current
- * block state.
- * This function does not perform lazy evaluation of matches and inserts
- * new strings in the dictionary only for unmatched strings or for short
- * matches. It is used only for the fast compression options.
- */
-local block_state deflate_fast(s, flush)
-    deflate_state *s;
-    int flush;
-{
-    IPos hash_head;       /* head of the hash chain */
-    int bflush;           /* set if current block must be flushed */
-
-    for (;;) {
-        /* Make sure that we always have enough lookahead, except
-         * at the end of the input file. We need MAX_MATCH bytes
-         * for the next match, plus MIN_MATCH bytes to insert the
-         * string following the next match.
-         */
-        if (s->lookahead < MIN_LOOKAHEAD) {
-            fill_window(s);
-            if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
-                return need_more;
-            }
-            if (s->lookahead == 0) break; /* flush the current block */
-        }
-
-        /* Insert the string window[strstart .. strstart+2] in the
-         * dictionary, and set hash_head to the head of the hash chain:
-         */
-        hash_head = NIL;
-        if (s->lookahead >= MIN_MATCH) {
-            INSERT_STRING(s, s->strstart, hash_head);
-        }
-
-        /* Find the longest match, discarding those <= prev_length.
-         * At this point we have always match_length < MIN_MATCH
-         */
-        if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) {
-            /* To simplify the code, we prevent matches with the string
-             * of window index 0 (in particular we have to avoid a match
-             * of the string with itself at the start of the input file).
-             */
-            s->match_length = longest_match (s, hash_head);
-            /* longest_match() sets match_start */
-        }
-        if (s->match_length >= MIN_MATCH) {
-            check_match(s, s->strstart, s->match_start, s->match_length);
-
-            _tr_tally_dist(s, s->strstart - s->match_start,
-                           s->match_length - MIN_MATCH, bflush);
-
-            s->lookahead -= s->match_length;
-
-            /* Insert new strings in the hash table only if the match length
-             * is not too large. This saves time but degrades compression.
-             */
-#ifndef FASTEST
-            if (s->match_length <= s->max_insert_length &&
-                s->lookahead >= MIN_MATCH) {
-                s->match_length--; /* string at strstart already in table */
-                do {
-                    s->strstart++;
-                    INSERT_STRING(s, s->strstart, hash_head);
-                    /* strstart never exceeds WSIZE-MAX_MATCH, so there are
-                     * always MIN_MATCH bytes ahead.
-                     */
-                } while (--s->match_length != 0);
-                s->strstart++;
-            } else
-#endif
-            {
-                s->strstart += s->match_length;
-                s->match_length = 0;
-                s->ins_h = s->window[s->strstart];
-                UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]);
-#if MIN_MATCH != 3
-                Call UPDATE_HASH() MIN_MATCH-3 more times
-#endif
-                /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
-                 * matter since it will be recomputed at next deflate call.
-                 */
-            }
-        } else {
-            /* No match, output a literal byte */
-            Tracevv((stderr,"%c", s->window[s->strstart]));
-            _tr_tally_lit (s, s->window[s->strstart], bflush);
-            s->lookahead--;
-            s->strstart++;
-        }
-        if (bflush) FLUSH_BLOCK(s, 0);
-    }
-    s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
-    if (flush == Z_FINISH) {
-        FLUSH_BLOCK(s, 1);
-        return finish_done;
-    }
-    if (s->last_lit)
-        FLUSH_BLOCK(s, 0);
-    return block_done;
-}
-
-#ifndef FASTEST
-/* ===========================================================================
- * Same as above, but achieves better compression. We use a lazy
- * evaluation for matches: a match is finally adopted only if there is
- * no better match at the next window position.
- */
-local block_state deflate_slow(s, flush)
-    deflate_state *s;
-    int flush;
-{
-    IPos hash_head;          /* head of hash chain */
-    int bflush;              /* set if current block must be flushed */
-
-    /* Process the input block. */
-    for (;;) {
-        /* Make sure that we always have enough lookahead, except
-         * at the end of the input file. We need MAX_MATCH bytes
-         * for the next match, plus MIN_MATCH bytes to insert the
-         * string following the next match.
-         */
-        if (s->lookahead < MIN_LOOKAHEAD) {
-            fill_window(s);
-            if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
-                return need_more;
-            }
-            if (s->lookahead == 0) break; /* flush the current block */
-        }
-
-        /* Insert the string window[strstart .. strstart+2] in the
-         * dictionary, and set hash_head to the head of the hash chain:
-         */
-        hash_head = NIL;
-        if (s->lookahead >= MIN_MATCH) {
-            INSERT_STRING(s, s->strstart, hash_head);
-        }
-
-        /* Find the longest match, discarding those <= prev_length.
-         */
-        s->prev_length = s->match_length, s->prev_match = s->match_start;
-        s->match_length = MIN_MATCH-1;
-
-        if (hash_head != NIL && s->prev_length < s->max_lazy_match &&
-            s->strstart - hash_head <= MAX_DIST(s)) {
-            /* To simplify the code, we prevent matches with the string
-             * of window index 0 (in particular we have to avoid a match
-             * of the string with itself at the start of the input file).
-             */
-            s->match_length = longest_match (s, hash_head);
-            /* longest_match() sets match_start */
-
-            if (s->match_length <= 5 && (s->strategy == Z_FILTERED
-#if TOO_FAR <= 32767
-                || (s->match_length == MIN_MATCH &&
-                    s->strstart - s->match_start > TOO_FAR)
-#endif
-                )) {
-
-                /* If prev_match is also MIN_MATCH, match_start is garbage
-                 * but we will ignore the current match anyway.
-                 */
-                s->match_length = MIN_MATCH-1;
-            }
-        }
-        /* If there was a match at the previous step and the current
-         * match is not better, output the previous match:
-         */
-        if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) {
-            uInt max_insert = s->strstart + s->lookahead - MIN_MATCH;
-            /* Do not insert strings in hash table beyond this. */
-
-            check_match(s, s->strstart-1, s->prev_match, s->prev_length);
-
-            _tr_tally_dist(s, s->strstart -1 - s->prev_match,
-                           s->prev_length - MIN_MATCH, bflush);
-
-            /* Insert in hash table all strings up to the end of the match.
-             * strstart-1 and strstart are already inserted. If there is not
-             * enough lookahead, the last two strings are not inserted in
-             * the hash table.
-             */
-            s->lookahead -= s->prev_length-1;
-            s->prev_length -= 2;
-            do {
-                if (++s->strstart <= max_insert) {
-                    INSERT_STRING(s, s->strstart, hash_head);
-                }
-            } while (--s->prev_length != 0);
-            s->match_available = 0;
-            s->match_length = MIN_MATCH-1;
-            s->strstart++;
-
-            if (bflush) FLUSH_BLOCK(s, 0);
-
-        } else if (s->match_available) {
-            /* If there was no match at the previous position, output a
-             * single literal. If there was a match but the current match
-             * is longer, truncate the previous match to a single literal.
-             */
-            Tracevv((stderr,"%c", s->window[s->strstart-1]));
-            _tr_tally_lit(s, s->window[s->strstart-1], bflush);
-            if (bflush) {
-                FLUSH_BLOCK_ONLY(s, 0);
-            }
-            s->strstart++;
-            s->lookahead--;
-            if (s->strm->avail_out == 0) return need_more;
-        } else {
-            /* There is no previous match to compare with, wait for
-             * the next step to decide.
-             */
-            s->match_available = 1;
-            s->strstart++;
-            s->lookahead--;
-        }
-    }
-    Assert (flush != Z_NO_FLUSH, "no flush?");
-    if (s->match_available) {
-        Tracevv((stderr,"%c", s->window[s->strstart-1]));
-        _tr_tally_lit(s, s->window[s->strstart-1], bflush);
-        s->match_available = 0;
-    }
-    s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
-    if (flush == Z_FINISH) {
-        FLUSH_BLOCK(s, 1);
-        return finish_done;
-    }
-    if (s->last_lit)
-        FLUSH_BLOCK(s, 0);
-    return block_done;
-}
-#endif /* FASTEST */
-
-/* ===========================================================================
- * For Z_RLE, simply look for runs of bytes, generate matches only of distance
- * one.  Do not maintain a hash table.  (It will be regenerated if this run of
- * deflate switches away from Z_RLE.)
- */
-local block_state deflate_rle(s, flush)
-    deflate_state *s;
-    int flush;
-{
-    int bflush;             /* set if current block must be flushed */
-    uInt prev;              /* byte at distance one to match */
-    Bytef *scan, *strend;   /* scan goes up to strend for length of run */
-
-    for (;;) {
-        /* Make sure that we always have enough lookahead, except
-         * at the end of the input file. We need MAX_MATCH bytes
-         * for the longest run, plus one for the unrolled loop.
-         */
-        if (s->lookahead <= MAX_MATCH) {
-            fill_window(s);
-            if (s->lookahead <= MAX_MATCH && flush == Z_NO_FLUSH) {
-                return need_more;
-            }
-            if (s->lookahead == 0) break; /* flush the current block */
-        }
-
-        /* See how many times the previous byte repeats */
-        s->match_length = 0;
-        if (s->lookahead >= MIN_MATCH && s->strstart > 0) {
-            scan = s->window + s->strstart - 1;
-            prev = *scan;
-            if (prev == *++scan && prev == *++scan && prev == *++scan) {
-                strend = s->window + s->strstart + MAX_MATCH;
-                do {
-                } while (prev == *++scan && prev == *++scan &&
-                         prev == *++scan && prev == *++scan &&
-                         prev == *++scan && prev == *++scan &&
-                         prev == *++scan && prev == *++scan &&
-                         scan < strend);
-                s->match_length = MAX_MATCH - (uInt)(strend - scan);
-                if (s->match_length > s->lookahead)
-                    s->match_length = s->lookahead;
-            }
-            Assert(scan <= s->window+(uInt)(s->window_size-1), "wild scan");
-        }
-
-        /* Emit match if have run of MIN_MATCH or longer, else emit literal */
-        if (s->match_length >= MIN_MATCH) {
-            check_match(s, s->strstart, s->strstart - 1, s->match_length);
-
-            _tr_tally_dist(s, 1, s->match_length - MIN_MATCH, bflush);
-
-            s->lookahead -= s->match_length;
-            s->strstart += s->match_length;
-            s->match_length = 0;
-        } else {
-            /* No match, output a literal byte */
-            Tracevv((stderr,"%c", s->window[s->strstart]));
-            _tr_tally_lit (s, s->window[s->strstart], bflush);
-            s->lookahead--;
-            s->strstart++;
-        }
-        if (bflush) FLUSH_BLOCK(s, 0);
-    }
-    s->insert = 0;
-    if (flush == Z_FINISH) {
-        FLUSH_BLOCK(s, 1);
-        return finish_done;
-    }
-    if (s->last_lit)
-        FLUSH_BLOCK(s, 0);
-    return block_done;
-}
-
-/* ===========================================================================
- * For Z_HUFFMAN_ONLY, do not look for matches.  Do not maintain a hash table.
- * (It will be regenerated if this run of deflate switches away from Huffman.)
- */
-local block_state deflate_huff(s, flush)
-    deflate_state *s;
-    int flush;
-{
-    int bflush;             /* set if current block must be flushed */
-
-    for (;;) {
-        /* Make sure that we have a literal to write. */
-        if (s->lookahead == 0) {
-            fill_window(s);
-            if (s->lookahead == 0) {
-                if (flush == Z_NO_FLUSH)
-                    return need_more;
-                break;      /* flush the current block */
-            }
-        }
-
-        /* Output a literal byte */
-        s->match_length = 0;
-        Tracevv((stderr,"%c", s->window[s->strstart]));
-        _tr_tally_lit (s, s->window[s->strstart], bflush);
-        s->lookahead--;
-        s->strstart++;
-        if (bflush) FLUSH_BLOCK(s, 0);
-    }
-    s->insert = 0;
-    if (flush == Z_FINISH) {
-        FLUSH_BLOCK(s, 1);
-        return finish_done;
-    }
-    if (s->last_lit)
-        FLUSH_BLOCK(s, 0);
-    return block_done;
-}
diff --git a/dep/zlib/src/deflate.h b/dep/zlib/src/deflate.h
deleted file mode 100644
index 23ecdd312..000000000
--- a/dep/zlib/src/deflate.h
+++ /dev/null
@@ -1,349 +0,0 @@
-/* deflate.h -- internal compression state
- * Copyright (C) 1995-2016 Jean-loup Gailly
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/* WARNING: this file should *not* be used by applications. It is
-   part of the implementation of the compression library and is
-   subject to change. Applications should only use zlib.h.
- */
-
-/* @(#) $Id$ */
-
-#ifndef DEFLATE_H
-#define DEFLATE_H
-
-#include "zutil.h"
-
-/* define NO_GZIP when compiling if you want to disable gzip header and
-   trailer creation by deflate().  NO_GZIP would be used to avoid linking in
-   the crc code when it is not needed.  For shared libraries, gzip encoding
-   should be left enabled. */
-#ifndef NO_GZIP
-#  define GZIP
-#endif
-
-/* ===========================================================================
- * Internal compression state.
- */
-
-#define LENGTH_CODES 29
-/* number of length codes, not counting the special END_BLOCK code */
-
-#define LITERALS  256
-/* number of literal bytes 0..255 */
-
-#define L_CODES (LITERALS+1+LENGTH_CODES)
-/* number of Literal or Length codes, including the END_BLOCK code */
-
-#define D_CODES   30
-/* number of distance codes */
-
-#define BL_CODES  19
-/* number of codes used to transfer the bit lengths */
-
-#define HEAP_SIZE (2*L_CODES+1)
-/* maximum heap size */
-
-#define MAX_BITS 15
-/* All codes must not exceed MAX_BITS bits */
-
-#define Buf_size 16
-/* size of bit buffer in bi_buf */
-
-#define INIT_STATE    42    /* zlib header -> BUSY_STATE */
-#ifdef GZIP
-#  define GZIP_STATE  57    /* gzip header -> BUSY_STATE | EXTRA_STATE */
-#endif
-#define EXTRA_STATE   69    /* gzip extra block -> NAME_STATE */
-#define NAME_STATE    73    /* gzip file name -> COMMENT_STATE */
-#define COMMENT_STATE 91    /* gzip comment -> HCRC_STATE */
-#define HCRC_STATE   103    /* gzip header CRC -> BUSY_STATE */
-#define BUSY_STATE   113    /* deflate -> FINISH_STATE */
-#define FINISH_STATE 666    /* stream complete */
-/* Stream status */
-
-
-/* Data structure describing a single value and its code string. */
-typedef struct ct_data_s {
-    union {
-        ush  freq;       /* frequency count */
-        ush  code;       /* bit string */
-    } fc;
-    union {
-        ush  dad;        /* father node in Huffman tree */
-        ush  len;        /* length of bit string */
-    } dl;
-} FAR ct_data;
-
-#define Freq fc.freq
-#define Code fc.code
-#define Dad  dl.dad
-#define Len  dl.len
-
-typedef struct static_tree_desc_s  static_tree_desc;
-
-typedef struct tree_desc_s {
-    ct_data *dyn_tree;           /* the dynamic tree */
-    int     max_code;            /* largest code with non zero frequency */
-    const static_tree_desc *stat_desc;  /* the corresponding static tree */
-} FAR tree_desc;
-
-typedef ush Pos;
-typedef Pos FAR Posf;
-typedef unsigned IPos;
-
-/* A Pos is an index in the character window. We use short instead of int to
- * save space in the various tables. IPos is used only for parameter passing.
- */
-
-typedef struct internal_state {
-    z_streamp strm;      /* pointer back to this zlib stream */
-    int   status;        /* as the name implies */
-    Bytef *pending_buf;  /* output still pending */
-    ulg   pending_buf_size; /* size of pending_buf */
-    Bytef *pending_out;  /* next pending byte to output to the stream */
-    ulg   pending;       /* nb of bytes in the pending buffer */
-    int   wrap;          /* bit 0 true for zlib, bit 1 true for gzip */
-    gz_headerp  gzhead;  /* gzip header information to write */
-    ulg   gzindex;       /* where in extra, name, or comment */
-    Byte  method;        /* can only be DEFLATED */
-    int   last_flush;    /* value of flush param for previous deflate call */
-
-                /* used by deflate.c: */
-
-    uInt  w_size;        /* LZ77 window size (32K by default) */
-    uInt  w_bits;        /* log2(w_size)  (8..16) */
-    uInt  w_mask;        /* w_size - 1 */
-
-    Bytef *window;
-    /* Sliding window. Input bytes are read into the second half of the window,
-     * and move to the first half later to keep a dictionary of at least wSize
-     * bytes. With this organization, matches are limited to a distance of
-     * wSize-MAX_MATCH bytes, but this ensures that IO is always
-     * performed with a length multiple of the block size. Also, it limits
-     * the window size to 64K, which is quite useful on MSDOS.
-     * To do: use the user input buffer as sliding window.
-     */
-
-    ulg window_size;
-    /* Actual size of window: 2*wSize, except when the user input buffer
-     * is directly used as sliding window.
-     */
-
-    Posf *prev;
-    /* Link to older string with same hash index. To limit the size of this
-     * array to 64K, this link is maintained only for the last 32K strings.
-     * An index in this array is thus a window index modulo 32K.
-     */
-
-    Posf *head; /* Heads of the hash chains or NIL. */
-
-    uInt  ins_h;          /* hash index of string to be inserted */
-    uInt  hash_size;      /* number of elements in hash table */
-    uInt  hash_bits;      /* log2(hash_size) */
-    uInt  hash_mask;      /* hash_size-1 */
-
-    uInt  hash_shift;
-    /* Number of bits by which ins_h must be shifted at each input
-     * step. It must be such that after MIN_MATCH steps, the oldest
-     * byte no longer takes part in the hash key, that is:
-     *   hash_shift * MIN_MATCH >= hash_bits
-     */
-
-    long block_start;
-    /* Window position at the beginning of the current output block. Gets
-     * negative when the window is moved backwards.
-     */
-
-    uInt match_length;           /* length of best match */
-    IPos prev_match;             /* previous match */
-    int match_available;         /* set if previous match exists */
-    uInt strstart;               /* start of string to insert */
-    uInt match_start;            /* start of matching string */
-    uInt lookahead;              /* number of valid bytes ahead in window */
-
-    uInt prev_length;
-    /* Length of the best match at previous step. Matches not greater than this
-     * are discarded. This is used in the lazy match evaluation.
-     */
-
-    uInt max_chain_length;
-    /* To speed up deflation, hash chains are never searched beyond this
-     * length.  A higher limit improves compression ratio but degrades the
-     * speed.
-     */
-
-    uInt max_lazy_match;
-    /* Attempt to find a better match only when the current match is strictly
-     * smaller than this value. This mechanism is used only for compression
-     * levels >= 4.
-     */
-#   define max_insert_length  max_lazy_match
-    /* Insert new strings in the hash table only if the match length is not
-     * greater than this length. This saves time but degrades compression.
-     * max_insert_length is used only for compression levels <= 3.
-     */
-
-    int level;    /* compression level (1..9) */
-    int strategy; /* favor or force Huffman coding*/
-
-    uInt good_match;
-    /* Use a faster search when the previous match is longer than this */
-
-    int nice_match; /* Stop searching when current match exceeds this */
-
-                /* used by trees.c: */
-    /* Didn't use ct_data typedef below to suppress compiler warning */
-    struct ct_data_s dyn_ltree[HEAP_SIZE];   /* literal and length tree */
-    struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
-    struct ct_data_s bl_tree[2*BL_CODES+1];  /* Huffman tree for bit lengths */
-
-    struct tree_desc_s l_desc;               /* desc. for literal tree */
-    struct tree_desc_s d_desc;               /* desc. for distance tree */
-    struct tree_desc_s bl_desc;              /* desc. for bit length tree */
-
-    ush bl_count[MAX_BITS+1];
-    /* number of codes at each bit length for an optimal tree */
-
-    int heap[2*L_CODES+1];      /* heap used to build the Huffman trees */
-    int heap_len;               /* number of elements in the heap */
-    int heap_max;               /* element of largest frequency */
-    /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
-     * The same heap array is used to build all trees.
-     */
-
-    uch depth[2*L_CODES+1];
-    /* Depth of each subtree used as tie breaker for trees of equal frequency
-     */
-
-    uchf *l_buf;          /* buffer for literals or lengths */
-
-    uInt  lit_bufsize;
-    /* Size of match buffer for literals/lengths.  There are 4 reasons for
-     * limiting lit_bufsize to 64K:
-     *   - frequencies can be kept in 16 bit counters
-     *   - if compression is not successful for the first block, all input
-     *     data is still in the window so we can still emit a stored block even
-     *     when input comes from standard input.  (This can also be done for
-     *     all blocks if lit_bufsize is not greater than 32K.)
-     *   - if compression is not successful for a file smaller than 64K, we can
-     *     even emit a stored file instead of a stored block (saving 5 bytes).
-     *     This is applicable only for zip (not gzip or zlib).
-     *   - creating new Huffman trees less frequently may not provide fast
-     *     adaptation to changes in the input data statistics. (Take for
-     *     example a binary file with poorly compressible code followed by
-     *     a highly compressible string table.) Smaller buffer sizes give
-     *     fast adaptation but have of course the overhead of transmitting
-     *     trees more frequently.
-     *   - I can't count above 4
-     */
-
-    uInt last_lit;      /* running index in l_buf */
-
-    ushf *d_buf;
-    /* Buffer for distances. To simplify the code, d_buf and l_buf have
-     * the same number of elements. To use different lengths, an extra flag
-     * array would be necessary.
-     */
-
-    ulg opt_len;        /* bit length of current block with optimal trees */
-    ulg static_len;     /* bit length of current block with static trees */
-    uInt matches;       /* number of string matches in current block */
-    uInt insert;        /* bytes at end of window left to insert */
-
-#ifdef ZLIB_DEBUG
-    ulg compressed_len; /* total bit length of compressed file mod 2^32 */
-    ulg bits_sent;      /* bit length of compressed data sent mod 2^32 */
-#endif
-
-    ush bi_buf;
-    /* Output buffer. bits are inserted starting at the bottom (least
-     * significant bits).
-     */
-    int bi_valid;
-    /* Number of valid bits in bi_buf.  All bits above the last valid bit
-     * are always zero.
-     */
-
-    ulg high_water;
-    /* High water mark offset in window for initialized bytes -- bytes above
-     * this are set to zero in order to avoid memory check warnings when
-     * longest match routines access bytes past the input.  This is then
-     * updated to the new high water mark.
-     */
-
-} FAR deflate_state;
-
-/* Output a byte on the stream.
- * IN assertion: there is enough room in pending_buf.
- */
-#define put_byte(s, c) {s->pending_buf[s->pending++] = (Bytef)(c);}
-
-
-#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
-/* Minimum amount of lookahead, except at the end of the input file.
- * See deflate.c for comments about the MIN_MATCH+1.
- */
-
-#define MAX_DIST(s)  ((s)->w_size-MIN_LOOKAHEAD)
-/* In order to simplify the code, particularly on 16 bit machines, match
- * distances are limited to MAX_DIST instead of WSIZE.
- */
-
-#define WIN_INIT MAX_MATCH
-/* Number of bytes after end of data in window to initialize in order to avoid
-   memory checker errors from longest match routines */
-
-        /* in trees.c */
-void ZLIB_INTERNAL _tr_init OF((deflate_state *s));
-int ZLIB_INTERNAL _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc));
-void ZLIB_INTERNAL _tr_flush_block OF((deflate_state *s, charf *buf,
-                        ulg stored_len, int last));
-void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state *s));
-void ZLIB_INTERNAL _tr_align OF((deflate_state *s));
-void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,
-                        ulg stored_len, int last));
-
-#define d_code(dist) \
-   ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
-/* Mapping from a distance to a distance code. dist is the distance - 1 and
- * must not have side effects. _dist_code[256] and _dist_code[257] are never
- * used.
- */
-
-#ifndef ZLIB_DEBUG
-/* Inline versions of _tr_tally for speed: */
-
-#if defined(GEN_TREES_H) || !defined(STDC)
-  extern uch ZLIB_INTERNAL _length_code[];
-  extern uch ZLIB_INTERNAL _dist_code[];
-#else
-  extern const uch ZLIB_INTERNAL _length_code[];
-  extern const uch ZLIB_INTERNAL _dist_code[];
-#endif
-
-# define _tr_tally_lit(s, c, flush) \
-  { uch cc = (c); \
-    s->d_buf[s->last_lit] = 0; \
-    s->l_buf[s->last_lit++] = cc; \
-    s->dyn_ltree[cc].Freq++; \
-    flush = (s->last_lit == s->lit_bufsize-1); \
-   }
-# define _tr_tally_dist(s, distance, length, flush) \
-  { uch len = (uch)(length); \
-    ush dist = (ush)(distance); \
-    s->d_buf[s->last_lit] = dist; \
-    s->l_buf[s->last_lit++] = len; \
-    dist--; \
-    s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
-    s->dyn_dtree[d_code(dist)].Freq++; \
-    flush = (s->last_lit == s->lit_bufsize-1); \
-  }
-#else
-# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
-# define _tr_tally_dist(s, distance, length, flush) \
-              flush = _tr_tally(s, distance, length)
-#endif
-
-#endif /* DEFLATE_H */
diff --git a/dep/zlib/src/gzclose.c b/dep/zlib/src/gzclose.c
deleted file mode 100644
index caeb99a31..000000000
--- a/dep/zlib/src/gzclose.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/* gzclose.c -- zlib gzclose() function
- * Copyright (C) 2004, 2010 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#include "gzguts.h"
-
-/* gzclose() is in a separate file so that it is linked in only if it is used.
-   That way the other gzclose functions can be used instead to avoid linking in
-   unneeded compression or decompression routines. */
-int ZEXPORT gzclose(file)
-    gzFile file;
-{
-#ifndef NO_GZCOMPRESS
-    gz_statep state;
-
-    if (file == NULL)
-        return Z_STREAM_ERROR;
-    state = (gz_statep)file;
-
-    return state->mode == GZ_READ ? gzclose_r(file) : gzclose_w(file);
-#else
-    return gzclose_r(file);
-#endif
-}
diff --git a/dep/zlib/src/gzguts.h b/dep/zlib/src/gzguts.h
deleted file mode 100644
index 1b56c48a5..000000000
--- a/dep/zlib/src/gzguts.h
+++ /dev/null
@@ -1,220 +0,0 @@
-/* gzguts.h -- zlib internal header definitions for gz* operations
- * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#ifdef _LARGEFILE64_SOURCE
-#  ifndef _LARGEFILE_SOURCE
-#    define _LARGEFILE_SOURCE 1
-#  endif
-#  ifdef _FILE_OFFSET_BITS
-#    undef _FILE_OFFSET_BITS
-#  endif
-#endif
-
-#ifdef HAVE_HIDDEN
-#  define ZLIB_INTERNAL __attribute__((visibility ("hidden")))
-#else
-#  define ZLIB_INTERNAL
-#endif
-
-#include <stdio.h>
-#include "zlib.h"
-#ifdef STDC
-#  include <string.h>
-#  include <stdlib.h>
-#  include <limits.h>
-#endif
-
-#ifndef _POSIX_SOURCE
-#  define _POSIX_SOURCE
-#endif
-#include <fcntl.h>
-
-#ifdef _WIN32
-#  include <stddef.h>
-#endif
-
-#if defined(__TURBOC__) || defined(_MSC_VER) || defined(_WIN32)
-#  include <io.h>
-#else
-#  include <unistd.h>
-#endif
-
-#if defined(_WIN32) || defined(__CYGWIN__)
-#  define WIDECHAR
-#endif
-
-#ifdef WINAPI_FAMILY
-#  define open _open
-#  define read _read
-#  define write _write
-#  define close _close
-#endif
-
-#ifdef NO_DEFLATE       /* for compatibility with old definition */
-#  define NO_GZCOMPRESS
-#endif
-
-#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550)
-#  ifndef HAVE_VSNPRINTF
-#    define HAVE_VSNPRINTF
-#  endif
-#endif
-
-#if defined(__CYGWIN__)
-#  ifndef HAVE_VSNPRINTF
-#    define HAVE_VSNPRINTF
-#  endif
-#endif
-
-#if defined(MSDOS) && defined(__BORLANDC__) && (BORLANDC > 0x410)
-#  ifndef HAVE_VSNPRINTF
-#    define HAVE_VSNPRINTF
-#  endif
-#endif
-
-#ifndef HAVE_VSNPRINTF
-#  ifdef MSDOS
-/* vsnprintf may exist on some MS-DOS compilers (DJGPP?),
-   but for now we just assume it doesn't. */
-#    define NO_vsnprintf
-#  endif
-#  ifdef __TURBOC__
-#    define NO_vsnprintf
-#  endif
-#  ifdef WIN32
-/* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */
-#    if !defined(vsnprintf) && !defined(NO_vsnprintf)
-#      if !defined(_MSC_VER) || ( defined(_MSC_VER) && _MSC_VER < 1500 )
-#         define vsnprintf _vsnprintf
-#      endif
-#    endif
-#  endif
-#  ifdef __SASC
-#    define NO_vsnprintf
-#  endif
-#  ifdef VMS
-#    define NO_vsnprintf
-#  endif
-#  ifdef __OS400__
-#    define NO_vsnprintf
-#  endif
-#  ifdef __MVS__
-#    define NO_vsnprintf
-#  endif
-#endif
-
-/* unlike snprintf (which is required in C99), _snprintf does not guarantee
-   null termination of the result -- however this is only used in gzlib.c where
-   the result is assured to fit in the space provided */
-#if defined(_MSC_VER) && _MSC_VER < 1900
-#  define snprintf _snprintf
-#endif
-
-#ifndef local
-#  define local static
-#endif
-/* since "static" is used to mean two completely different things in C, we
-   define "local" for the non-static meaning of "static", for readability
-   (compile with -Dlocal if your debugger can't find static symbols) */
-
-/* gz* functions always use library allocation functions */
-#ifndef STDC
-  extern voidp  malloc OF((uInt size));
-  extern void   free   OF((voidpf ptr));
-#endif
-
-/* get errno and strerror definition */
-#if defined UNDER_CE
-#  include <windows.h>
-#  define zstrerror() gz_strwinerror((DWORD)GetLastError())
-#else
-#  ifndef NO_STRERROR
-#    include <errno.h>
-#    define zstrerror() strerror(errno)
-#  else
-#    define zstrerror() "stdio error (consult errno)"
-#  endif
-#endif
-
-/* provide prototypes for these when building zlib without LFS */
-#if !defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0
-    ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
-    ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int));
-    ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile));
-    ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile));
-#endif
-
-/* default memLevel */
-#if MAX_MEM_LEVEL >= 8
-#  define DEF_MEM_LEVEL 8
-#else
-#  define DEF_MEM_LEVEL  MAX_MEM_LEVEL
-#endif
-
-/* default i/o buffer size -- double this for output when reading (this and
-   twice this must be able to fit in an unsigned type) */
-#define GZBUFSIZE 8192
-
-/* gzip modes, also provide a little integrity check on the passed structure */
-#define GZ_NONE 0
-#define GZ_READ 7247
-#define GZ_WRITE 31153
-#define GZ_APPEND 1     /* mode set to GZ_WRITE after the file is opened */
-
-/* values for gz_state how */
-#define LOOK 0      /* look for a gzip header */
-#define COPY 1      /* copy input directly */
-#define GZIP 2      /* decompress a gzip stream */
-
-/* internal gzip file state data structure */
-typedef struct {
-        /* exposed contents for gzgetc() macro */
-    struct gzFile_s x;      /* "x" for exposed */
-                            /* x.have: number of bytes available at x.next */
-                            /* x.next: next output data to deliver or write */
-                            /* x.pos: current position in uncompressed data */
-        /* used for both reading and writing */
-    int mode;               /* see gzip modes above */
-    int fd;                 /* file descriptor */
-    char *path;             /* path or fd for error messages */
-    unsigned size;          /* buffer size, zero if not allocated yet */
-    unsigned want;          /* requested buffer size, default is GZBUFSIZE */
-    unsigned char *in;      /* input buffer (double-sized when writing) */
-    unsigned char *out;     /* output buffer (double-sized when reading) */
-    int direct;             /* 0 if processing gzip, 1 if transparent */
-        /* just for reading */
-    int how;                /* 0: get header, 1: copy, 2: decompress */
-    z_off64_t start;        /* where the gzip data started, for rewinding */
-    int eof;                /* true if end of input file reached */
-    int past;               /* true if read requested past end */
-        /* just for writing */
-    int level;              /* compression level */
-    int strategy;           /* compression strategy */
-        /* seek request */
-    z_off64_t skip;         /* amount to skip (already rewound if backwards) */
-    int seek;               /* true if seek request pending */
-        /* error information */
-    int err;                /* error code */
-    char *msg;              /* error message */
-        /* zlib inflate or deflate stream */
-    z_stream strm;          /* stream structure in-place (not a pointer) */
-} gz_state;
-typedef gz_state FAR *gz_statep;
-
-/* shared functions */
-void ZLIB_INTERNAL gz_error OF((gz_statep, int, const char *));
-#if defined UNDER_CE
-char ZLIB_INTERNAL *gz_strwinerror OF((DWORD error));
-#endif
-
-/* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t
-   value -- needed when comparing unsigned to z_off64_t, which is signed
-   (possible z_off64_t types off_t, off64_t, and long are all signed) */
-#ifdef INT_MAX
-#  define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > INT_MAX)
-#else
-unsigned ZLIB_INTERNAL gz_intmax OF((void));
-#  define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax())
-#endif
diff --git a/dep/zlib/src/gzlib.c b/dep/zlib/src/gzlib.c
deleted file mode 100644
index 4105e6aff..000000000
--- a/dep/zlib/src/gzlib.c
+++ /dev/null
@@ -1,637 +0,0 @@
-/* gzlib.c -- zlib functions common to reading and writing gzip files
- * Copyright (C) 2004-2017 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#include "gzguts.h"
-
-#if defined(_WIN32) && !defined(__BORLANDC__) && !defined(__MINGW32__)
-#  define LSEEK _lseeki64
-#else
-#if defined(_LARGEFILE64_SOURCE) && _LFS64_LARGEFILE-0
-#  define LSEEK lseek64
-#else
-#  define LSEEK lseek
-#endif
-#endif
-
-/* Local functions */
-local void gz_reset OF((gz_statep));
-local gzFile gz_open OF((const void *, int, const char *));
-
-#if defined UNDER_CE
-
-/* Map the Windows error number in ERROR to a locale-dependent error message
-   string and return a pointer to it.  Typically, the values for ERROR come
-   from GetLastError.
-
-   The string pointed to shall not be modified by the application, but may be
-   overwritten by a subsequent call to gz_strwinerror
-
-   The gz_strwinerror function does not change the current setting of
-   GetLastError. */
-char ZLIB_INTERNAL *gz_strwinerror (error)
-     DWORD error;
-{
-    static char buf[1024];
-
-    wchar_t *msgbuf;
-    DWORD lasterr = GetLastError();
-    DWORD chars = FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM
-        | FORMAT_MESSAGE_ALLOCATE_BUFFER,
-        NULL,
-        error,
-        0, /* Default language */
-        (LPVOID)&msgbuf,
-        0,
-        NULL);
-    if (chars != 0) {
-        /* If there is an \r\n appended, zap it.  */
-        if (chars >= 2
-            && msgbuf[chars - 2] == '\r' && msgbuf[chars - 1] == '\n') {
-            chars -= 2;
-            msgbuf[chars] = 0;
-        }
-
-        if (chars > sizeof (buf) - 1) {
-            chars = sizeof (buf) - 1;
-            msgbuf[chars] = 0;
-        }
-
-        wcstombs(buf, msgbuf, chars + 1);
-        LocalFree(msgbuf);
-    }
-    else {
-        sprintf(buf, "unknown win32 error (%ld)", error);
-    }
-
-    SetLastError(lasterr);
-    return buf;
-}
-
-#endif /* UNDER_CE */
-
-/* Reset gzip file state */
-local void gz_reset(state)
-    gz_statep state;
-{
-    state->x.have = 0;              /* no output data available */
-    if (state->mode == GZ_READ) {   /* for reading ... */
-        state->eof = 0;             /* not at end of file */
-        state->past = 0;            /* have not read past end yet */
-        state->how = LOOK;          /* look for gzip header */
-    }
-    state->seek = 0;                /* no seek request pending */
-    gz_error(state, Z_OK, NULL);    /* clear error */
-    state->x.pos = 0;               /* no uncompressed data yet */
-    state->strm.avail_in = 0;       /* no input data yet */
-}
-
-/* Open a gzip file either by name or file descriptor. */
-local gzFile gz_open(path, fd, mode)
-    const void *path;
-    int fd;
-    const char *mode;
-{
-    gz_statep state;
-    z_size_t len;
-    int oflag;
-#ifdef O_CLOEXEC
-    int cloexec = 0;
-#endif
-#ifdef O_EXCL
-    int exclusive = 0;
-#endif
-
-    /* check input */
-    if (path == NULL)
-        return NULL;
-
-    /* allocate gzFile structure to return */
-    state = (gz_statep)malloc(sizeof(gz_state));
-    if (state == NULL)
-        return NULL;
-    state->size = 0;            /* no buffers allocated yet */
-    state->want = GZBUFSIZE;    /* requested buffer size */
-    state->msg = NULL;          /* no error message yet */
-
-    /* interpret mode */
-    state->mode = GZ_NONE;
-    state->level = Z_DEFAULT_COMPRESSION;
-    state->strategy = Z_DEFAULT_STRATEGY;
-    state->direct = 0;
-    while (*mode) {
-        if (*mode >= '0' && *mode <= '9')
-            state->level = *mode - '0';
-        else
-            switch (*mode) {
-            case 'r':
-                state->mode = GZ_READ;
-                break;
-#ifndef NO_GZCOMPRESS
-            case 'w':
-                state->mode = GZ_WRITE;
-                break;
-            case 'a':
-                state->mode = GZ_APPEND;
-                break;
-#endif
-            case '+':       /* can't read and write at the same time */
-                free(state);
-                return NULL;
-            case 'b':       /* ignore -- will request binary anyway */
-                break;
-#ifdef O_CLOEXEC
-            case 'e':
-                cloexec = 1;
-                break;
-#endif
-#ifdef O_EXCL
-            case 'x':
-                exclusive = 1;
-                break;
-#endif
-            case 'f':
-                state->strategy = Z_FILTERED;
-                break;
-            case 'h':
-                state->strategy = Z_HUFFMAN_ONLY;
-                break;
-            case 'R':
-                state->strategy = Z_RLE;
-                break;
-            case 'F':
-                state->strategy = Z_FIXED;
-                break;
-            case 'T':
-                state->direct = 1;
-                break;
-            default:        /* could consider as an error, but just ignore */
-                ;
-            }
-        mode++;
-    }
-
-    /* must provide an "r", "w", or "a" */
-    if (state->mode == GZ_NONE) {
-        free(state);
-        return NULL;
-    }
-
-    /* can't force transparent read */
-    if (state->mode == GZ_READ) {
-        if (state->direct) {
-            free(state);
-            return NULL;
-        }
-        state->direct = 1;      /* for empty file */
-    }
-
-    /* save the path name for error messages */
-#ifdef WIDECHAR
-    if (fd == -2) {
-        len = wcstombs(NULL, path, 0);
-        if (len == (z_size_t)-1)
-            len = 0;
-    }
-    else
-#endif
-        len = strlen((const char *)path);
-    state->path = (char *)malloc(len + 1);
-    if (state->path == NULL) {
-        free(state);
-        return NULL;
-    }
-#ifdef WIDECHAR
-    if (fd == -2)
-        if (len)
-            wcstombs(state->path, path, len + 1);
-        else
-            *(state->path) = 0;
-    else
-#endif
-#if !defined(NO_snprintf) && !defined(NO_vsnprintf)
-        (void)snprintf(state->path, len + 1, "%s", (const char *)path);
-#else
-        strcpy(state->path, path);
-#endif
-
-    /* compute the flags for open() */
-    oflag =
-#ifdef O_LARGEFILE
-        O_LARGEFILE |
-#endif
-#ifdef O_BINARY
-        O_BINARY |
-#endif
-#ifdef O_CLOEXEC
-        (cloexec ? O_CLOEXEC : 0) |
-#endif
-        (state->mode == GZ_READ ?
-         O_RDONLY :
-         (O_WRONLY | O_CREAT |
-#ifdef O_EXCL
-          (exclusive ? O_EXCL : 0) |
-#endif
-          (state->mode == GZ_WRITE ?
-           O_TRUNC :
-           O_APPEND)));
-
-    /* open the file with the appropriate flags (or just use fd) */
-    state->fd = fd > -1 ? fd : (
-#ifdef WIDECHAR
-        fd == -2 ? _wopen(path, oflag, 0666) :
-#endif
-        open((const char *)path, oflag, 0666));
-    if (state->fd == -1) {
-        free(state->path);
-        free(state);
-        return NULL;
-    }
-    if (state->mode == GZ_APPEND) {
-        LSEEK(state->fd, 0, SEEK_END);  /* so gzoffset() is correct */
-        state->mode = GZ_WRITE;         /* simplify later checks */
-    }
-
-    /* save the current position for rewinding (only if reading) */
-    if (state->mode == GZ_READ) {
-        state->start = LSEEK(state->fd, 0, SEEK_CUR);
-        if (state->start == -1) state->start = 0;
-    }
-
-    /* initialize stream */
-    gz_reset(state);
-
-    /* return stream */
-    return (gzFile)state;
-}
-
-/* -- see zlib.h -- */
-gzFile ZEXPORT gzopen(path, mode)
-    const char *path;
-    const char *mode;
-{
-    return gz_open(path, -1, mode);
-}
-
-/* -- see zlib.h -- */
-gzFile ZEXPORT gzopen64(path, mode)
-    const char *path;
-    const char *mode;
-{
-    return gz_open(path, -1, mode);
-}
-
-/* -- see zlib.h -- */
-gzFile ZEXPORT gzdopen(fd, mode)
-    int fd;
-    const char *mode;
-{
-    char *path;         /* identifier for error messages */
-    gzFile gz;
-
-    if (fd == -1 || (path = (char *)malloc(7 + 3 * sizeof(int))) == NULL)
-        return NULL;
-#if !defined(NO_snprintf) && !defined(NO_vsnprintf)
-    (void)snprintf(path, 7 + 3 * sizeof(int), "<fd:%d>", fd);
-#else
-    sprintf(path, "<fd:%d>", fd);   /* for debugging */
-#endif
-    gz = gz_open(path, fd, mode);
-    free(path);
-    return gz;
-}
-
-/* -- see zlib.h -- */
-#ifdef WIDECHAR
-gzFile ZEXPORT gzopen_w(path, mode)
-    const wchar_t *path;
-    const char *mode;
-{
-    return gz_open(path, -2, mode);
-}
-#endif
-
-/* -- see zlib.h -- */
-int ZEXPORT gzbuffer(file, size)
-    gzFile file;
-    unsigned size;
-{
-    gz_statep state;
-
-    /* get internal structure and check integrity */
-    if (file == NULL)
-        return -1;
-    state = (gz_statep)file;
-    if (state->mode != GZ_READ && state->mode != GZ_WRITE)
-        return -1;
-
-    /* make sure we haven't already allocated memory */
-    if (state->size != 0)
-        return -1;
-
-    /* check and set requested size */
-    if ((size << 1) < size)
-        return -1;              /* need to be able to double it */
-    if (size < 2)
-        size = 2;               /* need two bytes to check magic header */
-    state->want = size;
-    return 0;
-}
-
-/* -- see zlib.h -- */
-int ZEXPORT gzrewind(file)
-    gzFile file;
-{
-    gz_statep state;
-
-    /* get internal structure */
-    if (file == NULL)
-        return -1;
-    state = (gz_statep)file;
-
-    /* check that we're reading and that there's no error */
-    if (state->mode != GZ_READ ||
-            (state->err != Z_OK && state->err != Z_BUF_ERROR))
-        return -1;
-
-    /* back up and start over */
-    if (LSEEK(state->fd, state->start, SEEK_SET) == -1)
-        return -1;
-    gz_reset(state);
-    return 0;
-}
-
-/* -- see zlib.h -- */
-z_off64_t ZEXPORT gzseek64(file, offset, whence)
-    gzFile file;
-    z_off64_t offset;
-    int whence;
-{
-    unsigned n;
-    z_off64_t ret;
-    gz_statep state;
-
-    /* get internal structure and check integrity */
-    if (file == NULL)
-        return -1;
-    state = (gz_statep)file;
-    if (state->mode != GZ_READ && state->mode != GZ_WRITE)
-        return -1;
-
-    /* check that there's no error */
-    if (state->err != Z_OK && state->err != Z_BUF_ERROR)
-        return -1;
-
-    /* can only seek from start or relative to current position */
-    if (whence != SEEK_SET && whence != SEEK_CUR)
-        return -1;
-
-    /* normalize offset to a SEEK_CUR specification */
-    if (whence == SEEK_SET)
-        offset -= state->x.pos;
-    else if (state->seek)
-        offset += state->skip;
-    state->seek = 0;
-
-    /* if within raw area while reading, just go there */
-    if (state->mode == GZ_READ && state->how == COPY &&
-            state->x.pos + offset >= 0) {
-        ret = LSEEK(state->fd, offset - state->x.have, SEEK_CUR);
-        if (ret == -1)
-            return -1;
-        state->x.have = 0;
-        state->eof = 0;
-        state->past = 0;
-        state->seek = 0;
-        gz_error(state, Z_OK, NULL);
-        state->strm.avail_in = 0;
-        state->x.pos += offset;
-        return state->x.pos;
-    }
-
-    /* calculate skip amount, rewinding if needed for back seek when reading */
-    if (offset < 0) {
-        if (state->mode != GZ_READ)         /* writing -- can't go backwards */
-            return -1;
-        offset += state->x.pos;
-        if (offset < 0)                     /* before start of file! */
-            return -1;
-        if (gzrewind(file) == -1)           /* rewind, then skip to offset */
-            return -1;
-    }
-
-    /* if reading, skip what's in output buffer (one less gzgetc() check) */
-    if (state->mode == GZ_READ) {
-        n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > offset ?
-            (unsigned)offset : state->x.have;
-        state->x.have -= n;
-        state->x.next += n;
-        state->x.pos += n;
-        offset -= n;
-    }
-
-    /* request skip (if not zero) */
-    if (offset) {
-        state->seek = 1;
-        state->skip = offset;
-    }
-    return state->x.pos + offset;
-}
-
-/* -- see zlib.h -- */
-z_off_t ZEXPORT gzseek(file, offset, whence)
-    gzFile file;
-    z_off_t offset;
-    int whence;
-{
-    z_off64_t ret;
-
-    ret = gzseek64(file, (z_off64_t)offset, whence);
-    return ret == (z_off_t)ret ? (z_off_t)ret : -1;
-}
-
-/* -- see zlib.h -- */
-z_off64_t ZEXPORT gztell64(file)
-    gzFile file;
-{
-    gz_statep state;
-
-    /* get internal structure and check integrity */
-    if (file == NULL)
-        return -1;
-    state = (gz_statep)file;
-    if (state->mode != GZ_READ && state->mode != GZ_WRITE)
-        return -1;
-
-    /* return position */
-    return state->x.pos + (state->seek ? state->skip : 0);
-}
-
-/* -- see zlib.h -- */
-z_off_t ZEXPORT gztell(file)
-    gzFile file;
-{
-    z_off64_t ret;
-
-    ret = gztell64(file);
-    return ret == (z_off_t)ret ? (z_off_t)ret : -1;
-}
-
-/* -- see zlib.h -- */
-z_off64_t ZEXPORT gzoffset64(file)
-    gzFile file;
-{
-    z_off64_t offset;
-    gz_statep state;
-
-    /* get internal structure and check integrity */
-    if (file == NULL)
-        return -1;
-    state = (gz_statep)file;
-    if (state->mode != GZ_READ && state->mode != GZ_WRITE)
-        return -1;
-
-    /* compute and return effective offset in file */
-    offset = LSEEK(state->fd, 0, SEEK_CUR);
-    if (offset == -1)
-        return -1;
-    if (state->mode == GZ_READ)             /* reading */
-        offset -= state->strm.avail_in;     /* don't count buffered input */
-    return offset;
-}
-
-/* -- see zlib.h -- */
-z_off_t ZEXPORT gzoffset(file)
-    gzFile file;
-{
-    z_off64_t ret;
-
-    ret = gzoffset64(file);
-    return ret == (z_off_t)ret ? (z_off_t)ret : -1;
-}
-
-/* -- see zlib.h -- */
-int ZEXPORT gzeof(file)
-    gzFile file;
-{
-    gz_statep state;
-
-    /* get internal structure and check integrity */
-    if (file == NULL)
-        return 0;
-    state = (gz_statep)file;
-    if (state->mode != GZ_READ && state->mode != GZ_WRITE)
-        return 0;
-
-    /* return end-of-file state */
-    return state->mode == GZ_READ ? state->past : 0;
-}
-
-/* -- see zlib.h -- */
-const char * ZEXPORT gzerror(file, errnum)
-    gzFile file;
-    int *errnum;
-{
-    gz_statep state;
-
-    /* get internal structure and check integrity */
-    if (file == NULL)
-        return NULL;
-    state = (gz_statep)file;
-    if (state->mode != GZ_READ && state->mode != GZ_WRITE)
-        return NULL;
-
-    /* return error information */
-    if (errnum != NULL)
-        *errnum = state->err;
-    return state->err == Z_MEM_ERROR ? "out of memory" :
-                                       (state->msg == NULL ? "" : state->msg);
-}
-
-/* -- see zlib.h -- */
-void ZEXPORT gzclearerr(file)
-    gzFile file;
-{
-    gz_statep state;
-
-    /* get internal structure and check integrity */
-    if (file == NULL)
-        return;
-    state = (gz_statep)file;
-    if (state->mode != GZ_READ && state->mode != GZ_WRITE)
-        return;
-
-    /* clear error and end-of-file */
-    if (state->mode == GZ_READ) {
-        state->eof = 0;
-        state->past = 0;
-    }
-    gz_error(state, Z_OK, NULL);
-}
-
-/* Create an error message in allocated memory and set state->err and
-   state->msg accordingly.  Free any previous error message already there.  Do
-   not try to free or allocate space if the error is Z_MEM_ERROR (out of
-   memory).  Simply save the error message as a static string.  If there is an
-   allocation failure constructing the error message, then convert the error to
-   out of memory. */
-void ZLIB_INTERNAL gz_error(state, err, msg)
-    gz_statep state;
-    int err;
-    const char *msg;
-{
-    /* free previously allocated message and clear */
-    if (state->msg != NULL) {
-        if (state->err != Z_MEM_ERROR)
-            free(state->msg);
-        state->msg = NULL;
-    }
-
-    /* if fatal, set state->x.have to 0 so that the gzgetc() macro fails */
-    if (err != Z_OK && err != Z_BUF_ERROR)
-        state->x.have = 0;
-
-    /* set error code, and if no message, then done */
-    state->err = err;
-    if (msg == NULL)
-        return;
-
-    /* for an out of memory error, return literal string when requested */
-    if (err == Z_MEM_ERROR)
-        return;
-
-    /* construct error message with path */
-    if ((state->msg = (char *)malloc(strlen(state->path) + strlen(msg) + 3)) ==
-            NULL) {
-        state->err = Z_MEM_ERROR;
-        return;
-    }
-#if !defined(NO_snprintf) && !defined(NO_vsnprintf)
-    (void)snprintf(state->msg, strlen(state->path) + strlen(msg) + 3,
-                   "%s%s%s", state->path, ": ", msg);
-#else
-    strcpy(state->msg, state->path);
-    strcat(state->msg, ": ");
-    strcat(state->msg, msg);
-#endif
-}
-
-#ifndef INT_MAX
-/* portably return maximum value for an int (when limits.h presumed not
-   available) -- we need to do this to cover cases where 2's complement not
-   used, since C standard permits 1's complement and sign-bit representations,
-   otherwise we could just use ((unsigned)-1) >> 1 */
-unsigned ZLIB_INTERNAL gz_intmax()
-{
-    unsigned p, q;
-
-    p = 1;
-    do {
-        q = p;
-        p <<= 1;
-        p++;
-    } while (p > q);
-    return q >> 1;
-}
-#endif
diff --git a/dep/zlib/src/gzread.c b/dep/zlib/src/gzread.c
deleted file mode 100644
index 956b91ea7..000000000
--- a/dep/zlib/src/gzread.c
+++ /dev/null
@@ -1,654 +0,0 @@
-/* gzread.c -- zlib functions for reading gzip files
- * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#include "gzguts.h"
-
-/* Local functions */
-local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
-local int gz_avail OF((gz_statep));
-local int gz_look OF((gz_statep));
-local int gz_decomp OF((gz_statep));
-local int gz_fetch OF((gz_statep));
-local int gz_skip OF((gz_statep, z_off64_t));
-local z_size_t gz_read OF((gz_statep, voidp, z_size_t));
-
-/* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
-   state->fd, and update state->eof, state->err, and state->msg as appropriate.
-   This function needs to loop on read(), since read() is not guaranteed to
-   read the number of bytes requested, depending on the type of descriptor. */
-local int gz_load(state, buf, len, have)
-    gz_statep state;
-    unsigned char *buf;
-    unsigned len;
-    unsigned *have;
-{
-    int ret;
-    unsigned get, max = ((unsigned)-1 >> 2) + 1;
-
-    *have = 0;
-    do {
-        get = len - *have;
-        if (get > max)
-            get = max;
-        ret = read(state->fd, buf + *have, get);
-        if (ret <= 0)
-            break;
-        *have += (unsigned)ret;
-    } while (*have < len);
-    if (ret < 0) {
-        gz_error(state, Z_ERRNO, zstrerror());
-        return -1;
-    }
-    if (ret == 0)
-        state->eof = 1;
-    return 0;
-}
-
-/* Load up input buffer and set eof flag if last data loaded -- return -1 on
-   error, 0 otherwise.  Note that the eof flag is set when the end of the input
-   file is reached, even though there may be unused data in the buffer.  Once
-   that data has been used, no more attempts will be made to read the file.
-   If strm->avail_in != 0, then the current data is moved to the beginning of
-   the input buffer, and then the remainder of the buffer is loaded with the
-   available data from the input file. */
-local int gz_avail(state)
-    gz_statep state;
-{
-    unsigned got;
-    z_streamp strm = &(state->strm);
-
-    if (state->err != Z_OK && state->err != Z_BUF_ERROR)
-        return -1;
-    if (state->eof == 0) {
-        if (strm->avail_in) {       /* copy what's there to the start */
-            unsigned char *p = state->in;
-            unsigned const char *q = strm->next_in;
-            unsigned n = strm->avail_in;
-            do {
-                *p++ = *q++;
-            } while (--n);
-        }
-        if (gz_load(state, state->in + strm->avail_in,
-                    state->size - strm->avail_in, &got) == -1)
-            return -1;
-        strm->avail_in += got;
-        strm->next_in = state->in;
-    }
-    return 0;
-}
-
-/* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
-   If this is the first time in, allocate required memory.  state->how will be
-   left unchanged if there is no more input data available, will be set to COPY
-   if there is no gzip header and direct copying will be performed, or it will
-   be set to GZIP for decompression.  If direct copying, then leftover input
-   data from the input buffer will be copied to the output buffer.  In that
-   case, all further file reads will be directly to either the output buffer or
-   a user buffer.  If decompressing, the inflate state will be initialized.
-   gz_look() will return 0 on success or -1 on failure. */
-local int gz_look(state)
-    gz_statep state;
-{
-    z_streamp strm = &(state->strm);
-
-    /* allocate read buffers and inflate memory */
-    if (state->size == 0) {
-        /* allocate buffers */
-        state->in = (unsigned char *)malloc(state->want);
-        state->out = (unsigned char *)malloc(state->want << 1);
-        if (state->in == NULL || state->out == NULL) {
-            free(state->out);
-            free(state->in);
-            gz_error(state, Z_MEM_ERROR, "out of memory");
-            return -1;
-        }
-        state->size = state->want;
-
-        /* allocate inflate memory */
-        state->strm.zalloc = Z_NULL;
-        state->strm.zfree = Z_NULL;
-        state->strm.opaque = Z_NULL;
-        state->strm.avail_in = 0;
-        state->strm.next_in = Z_NULL;
-        if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) {    /* gunzip */
-            free(state->out);
-            free(state->in);
-            state->size = 0;
-            gz_error(state, Z_MEM_ERROR, "out of memory");
-            return -1;
-        }
-    }
-
-    /* get at least the magic bytes in the input buffer */
-    if (strm->avail_in < 2) {
-        if (gz_avail(state) == -1)
-            return -1;
-        if (strm->avail_in == 0)
-            return 0;
-    }
-
-    /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
-       a logical dilemma here when considering the case of a partially written
-       gzip file, to wit, if a single 31 byte is written, then we cannot tell
-       whether this is a single-byte file, or just a partially written gzip
-       file -- for here we assume that if a gzip file is being written, then
-       the header will be written in a single operation, so that reading a
-       single byte is sufficient indication that it is not a gzip file) */
-    if (strm->avail_in > 1 &&
-            strm->next_in[0] == 31 && strm->next_in[1] == 139) {
-        inflateReset(strm);
-        state->how = GZIP;
-        state->direct = 0;
-        return 0;
-    }
-
-    /* no gzip header -- if we were decoding gzip before, then this is trailing
-       garbage.  Ignore the trailing garbage and finish. */
-    if (state->direct == 0) {
-        strm->avail_in = 0;
-        state->eof = 1;
-        state->x.have = 0;
-        return 0;
-    }
-
-    /* doing raw i/o, copy any leftover input to output -- this assumes that
-       the output buffer is larger than the input buffer, which also assures
-       space for gzungetc() */
-    state->x.next = state->out;
-    if (strm->avail_in) {
-        memcpy(state->x.next, strm->next_in, strm->avail_in);
-        state->x.have = strm->avail_in;
-        strm->avail_in = 0;
-    }
-    state->how = COPY;
-    state->direct = 1;
-    return 0;
-}
-
-/* Decompress from input to the provided next_out and avail_out in the state.
-   On return, state->x.have and state->x.next point to the just decompressed
-   data.  If the gzip stream completes, state->how is reset to LOOK to look for
-   the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
-   on success, -1 on failure. */
-local int gz_decomp(state)
-    gz_statep state;
-{
-    int ret = Z_OK;
-    unsigned had;
-    z_streamp strm = &(state->strm);
-
-    /* fill output buffer up to end of deflate stream */
-    had = strm->avail_out;
-    do {
-        /* get more input for inflate() */
-        if (strm->avail_in == 0 && gz_avail(state) == -1)
-            return -1;
-        if (strm->avail_in == 0) {
-            gz_error(state, Z_BUF_ERROR, "unexpected end of file");
-            break;
-        }
-
-        /* decompress and handle errors */
-        ret = inflate(strm, Z_NO_FLUSH);
-        if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
-            gz_error(state, Z_STREAM_ERROR,
-                     "internal error: inflate stream corrupt");
-            return -1;
-        }
-        if (ret == Z_MEM_ERROR) {
-            gz_error(state, Z_MEM_ERROR, "out of memory");
-            return -1;
-        }
-        if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
-            gz_error(state, Z_DATA_ERROR,
-                     strm->msg == NULL ? "compressed data error" : strm->msg);
-            return -1;
-        }
-    } while (strm->avail_out && ret != Z_STREAM_END);
-
-    /* update available output */
-    state->x.have = had - strm->avail_out;
-    state->x.next = strm->next_out - state->x.have;
-
-    /* if the gzip stream completed successfully, look for another */
-    if (ret == Z_STREAM_END)
-        state->how = LOOK;
-
-    /* good decompression */
-    return 0;
-}
-
-/* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
-   Data is either copied from the input file or decompressed from the input
-   file depending on state->how.  If state->how is LOOK, then a gzip header is
-   looked for to determine whether to copy or decompress.  Returns -1 on error,
-   otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
-   end of the input file has been reached and all data has been processed.  */
-local int gz_fetch(state)
-    gz_statep state;
-{
-    z_streamp strm = &(state->strm);
-
-    do {
-        switch(state->how) {
-        case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
-            if (gz_look(state) == -1)
-                return -1;
-            if (state->how == LOOK)
-                return 0;
-            break;
-        case COPY:      /* -> COPY */
-            if (gz_load(state, state->out, state->size << 1, &(state->x.have))
-                    == -1)
-                return -1;
-            state->x.next = state->out;
-            return 0;
-        case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
-            strm->avail_out = state->size << 1;
-            strm->next_out = state->out;
-            if (gz_decomp(state) == -1)
-                return -1;
-        }
-    } while (state->x.have == 0 && (!state->eof || strm->avail_in));
-    return 0;
-}
-
-/* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
-local int gz_skip(state, len)
-    gz_statep state;
-    z_off64_t len;
-{
-    unsigned n;
-
-    /* skip over len bytes or reach end-of-file, whichever comes first */
-    while (len)
-        /* skip over whatever is in output buffer */
-        if (state->x.have) {
-            n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
-                (unsigned)len : state->x.have;
-            state->x.have -= n;
-            state->x.next += n;
-            state->x.pos += n;
-            len -= n;
-        }
-
-        /* output buffer empty -- return if we're at the end of the input */
-        else if (state->eof && state->strm.avail_in == 0)
-            break;
-
-        /* need more data to skip -- load up output buffer */
-        else {
-            /* get more output, looking for header if required */
-            if (gz_fetch(state) == -1)
-                return -1;
-        }
-    return 0;
-}
-
-/* Read len bytes into buf from file, or less than len up to the end of the
-   input.  Return the number of bytes read.  If zero is returned, either the
-   end of file was reached, or there was an error.  state->err must be
-   consulted in that case to determine which. */
-local z_size_t gz_read(state, buf, len)
-    gz_statep state;
-    voidp buf;
-    z_size_t len;
-{
-    z_size_t got;
-    unsigned n;
-
-    /* if len is zero, avoid unnecessary operations */
-    if (len == 0)
-        return 0;
-
-    /* process a skip request */
-    if (state->seek) {
-        state->seek = 0;
-        if (gz_skip(state, state->skip) == -1)
-            return 0;
-    }
-
-    /* get len bytes to buf, or less than len if at the end */
-    got = 0;
-    do {
-        /* set n to the maximum amount of len that fits in an unsigned int */
-        n = -1;
-        if (n > len)
-            n = len;
-
-        /* first just try copying data from the output buffer */
-        if (state->x.have) {
-            if (state->x.have < n)
-                n = state->x.have;
-            memcpy(buf, state->x.next, n);
-            state->x.next += n;
-            state->x.have -= n;
-        }
-
-        /* output buffer empty -- return if we're at the end of the input */
-        else if (state->eof && state->strm.avail_in == 0) {
-            state->past = 1;        /* tried to read past end */
-            break;
-        }
-
-        /* need output data -- for small len or new stream load up our output
-           buffer */
-        else if (state->how == LOOK || n < (state->size << 1)) {
-            /* get more output, looking for header if required */
-            if (gz_fetch(state) == -1)
-                return 0;
-            continue;       /* no progress yet -- go back to copy above */
-            /* the copy above assures that we will leave with space in the
-               output buffer, allowing at least one gzungetc() to succeed */
-        }
-
-        /* large len -- read directly into user buffer */
-        else if (state->how == COPY) {      /* read directly */
-            if (gz_load(state, (unsigned char *)buf, n, &n) == -1)
-                return 0;
-        }
-
-        /* large len -- decompress directly into user buffer */
-        else {  /* state->how == GZIP */
-            state->strm.avail_out = n;
-            state->strm.next_out = (unsigned char *)buf;
-            if (gz_decomp(state) == -1)
-                return 0;
-            n = state->x.have;
-            state->x.have = 0;
-        }
-
-        /* update progress */
-        len -= n;
-        buf = (char *)buf + n;
-        got += n;
-        state->x.pos += n;
-    } while (len);
-
-    /* return number of bytes read into user buffer */
-    return got;
-}
-
-/* -- see zlib.h -- */
-int ZEXPORT gzread(file, buf, len)
-    gzFile file;
-    voidp buf;
-    unsigned len;
-{
-    gz_statep state;
-
-    /* get internal structure */
-    if (file == NULL)
-        return -1;
-    state = (gz_statep)file;
-
-    /* check that we're reading and that there's no (serious) error */
-    if (state->mode != GZ_READ ||
-            (state->err != Z_OK && state->err != Z_BUF_ERROR))
-        return -1;
-
-    /* since an int is returned, make sure len fits in one, otherwise return
-       with an error (this avoids a flaw in the interface) */
-    if ((int)len < 0) {
-        gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
-        return -1;
-    }
-
-    /* read len or fewer bytes to buf */
-    len = gz_read(state, buf, len);
-
-    /* check for an error */
-    if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR)
-        return -1;
-
-    /* return the number of bytes read (this is assured to fit in an int) */
-    return (int)len;
-}
-
-/* -- see zlib.h -- */
-z_size_t ZEXPORT gzfread(buf, size, nitems, file)
-    voidp buf;
-    z_size_t size;
-    z_size_t nitems;
-    gzFile file;
-{
-    z_size_t len;
-    gz_statep state;
-
-    /* get internal structure */
-    if (file == NULL)
-        return 0;
-    state = (gz_statep)file;
-
-    /* check that we're reading and that there's no (serious) error */
-    if (state->mode != GZ_READ ||
-            (state->err != Z_OK && state->err != Z_BUF_ERROR))
-        return 0;
-
-    /* compute bytes to read -- error on overflow */
-    len = nitems * size;
-    if (size && len / size != nitems) {
-        gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
-        return 0;
-    }
-
-    /* read len or fewer bytes to buf, return the number of full items read */
-    return len ? gz_read(state, buf, len) / size : 0;
-}
-
-/* -- see zlib.h -- */
-#ifdef Z_PREFIX_SET
-#  undef z_gzgetc
-#else
-#  undef gzgetc
-#endif
-int ZEXPORT gzgetc(file)
-    gzFile file;
-{
-    int ret;
-    unsigned char buf[1];
-    gz_statep state;
-
-    /* get internal structure */
-    if (file == NULL)
-        return -1;
-    state = (gz_statep)file;
-
-    /* check that we're reading and that there's no (serious) error */
-    if (state->mode != GZ_READ ||
-        (state->err != Z_OK && state->err != Z_BUF_ERROR))
-        return -1;
-
-    /* try output buffer (no need to check for skip request) */
-    if (state->x.have) {
-        state->x.have--;
-        state->x.pos++;
-        return *(state->x.next)++;
-    }
-
-    /* nothing there -- try gz_read() */
-    ret = gz_read(state, buf, 1);
-    return ret < 1 ? -1 : buf[0];
-}
-
-int ZEXPORT gzgetc_(file)
-gzFile file;
-{
-    return gzgetc(file);
-}
-
-/* -- see zlib.h -- */
-int ZEXPORT gzungetc(c, file)
-    int c;
-    gzFile file;
-{
-    gz_statep state;
-
-    /* get internal structure */
-    if (file == NULL)
-        return -1;
-    state = (gz_statep)file;
-
-    /* check that we're reading and that there's no (serious) error */
-    if (state->mode != GZ_READ ||
-        (state->err != Z_OK && state->err != Z_BUF_ERROR))
-        return -1;
-
-    /* process a skip request */
-    if (state->seek) {
-        state->seek = 0;
-        if (gz_skip(state, state->skip) == -1)
-            return -1;
-    }
-
-    /* can't push EOF */
-    if (c < 0)
-        return -1;
-
-    /* if output buffer empty, put byte at end (allows more pushing) */
-    if (state->x.have == 0) {
-        state->x.have = 1;
-        state->x.next = state->out + (state->size << 1) - 1;
-        state->x.next[0] = (unsigned char)c;
-        state->x.pos--;
-        state->past = 0;
-        return c;
-    }
-
-    /* if no room, give up (must have already done a gzungetc()) */
-    if (state->x.have == (state->size << 1)) {
-        gz_error(state, Z_DATA_ERROR, "out of room to push characters");
-        return -1;
-    }
-
-    /* slide output data if needed and insert byte before existing data */
-    if (state->x.next == state->out) {
-        unsigned char *src = state->out + state->x.have;
-        unsigned char *dest = state->out + (state->size << 1);
-        while (src > state->out)
-            *--dest = *--src;
-        state->x.next = dest;
-    }
-    state->x.have++;
-    state->x.next--;
-    state->x.next[0] = (unsigned char)c;
-    state->x.pos--;
-    state->past = 0;
-    return c;
-}
-
-/* -- see zlib.h -- */
-char * ZEXPORT gzgets(file, buf, len)
-    gzFile file;
-    char *buf;
-    int len;
-{
-    unsigned left, n;
-    char *str;
-    unsigned char *eol;
-    gz_statep state;
-
-    /* check parameters and get internal structure */
-    if (file == NULL || buf == NULL || len < 1)
-        return NULL;
-    state = (gz_statep)file;
-
-    /* check that we're reading and that there's no (serious) error */
-    if (state->mode != GZ_READ ||
-        (state->err != Z_OK && state->err != Z_BUF_ERROR))
-        return NULL;
-
-    /* process a skip request */
-    if (state->seek) {
-        state->seek = 0;
-        if (gz_skip(state, state->skip) == -1)
-            return NULL;
-    }
-
-    /* copy output bytes up to new line or len - 1, whichever comes first --
-       append a terminating zero to the string (we don't check for a zero in
-       the contents, let the user worry about that) */
-    str = buf;
-    left = (unsigned)len - 1;
-    if (left) do {
-        /* assure that something is in the output buffer */
-        if (state->x.have == 0 && gz_fetch(state) == -1)
-            return NULL;                /* error */
-        if (state->x.have == 0) {       /* end of file */
-            state->past = 1;            /* read past end */
-            break;                      /* return what we have */
-        }
-
-        /* look for end-of-line in current output buffer */
-        n = state->x.have > left ? left : state->x.have;
-        eol = (unsigned char *)memchr(state->x.next, '\n', n);
-        if (eol != NULL)
-            n = (unsigned)(eol - state->x.next) + 1;
-
-        /* copy through end-of-line, or remainder if not found */
-        memcpy(buf, state->x.next, n);
-        state->x.have -= n;
-        state->x.next += n;
-        state->x.pos += n;
-        left -= n;
-        buf += n;
-    } while (left && eol == NULL);
-
-    /* return terminated string, or if nothing, end of file */
-    if (buf == str)
-        return NULL;
-    buf[0] = 0;
-    return str;
-}
-
-/* -- see zlib.h -- */
-int ZEXPORT gzdirect(file)
-    gzFile file;
-{
-    gz_statep state;
-
-    /* get internal structure */
-    if (file == NULL)
-        return 0;
-    state = (gz_statep)file;
-
-    /* if the state is not known, but we can find out, then do so (this is
-       mainly for right after a gzopen() or gzdopen()) */
-    if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
-        (void)gz_look(state);
-
-    /* return 1 if transparent, 0 if processing a gzip stream */
-    return state->direct;
-}
-
-/* -- see zlib.h -- */
-int ZEXPORT gzclose_r(file)
-    gzFile file;
-{
-    int ret, err;
-    gz_statep state;
-
-    /* get internal structure */
-    if (file == NULL)
-        return Z_STREAM_ERROR;
-    state = (gz_statep)file;
-
-    /* check that we're reading */
-    if (state->mode != GZ_READ)
-        return Z_STREAM_ERROR;
-
-    /* free memory and close file */
-    if (state->size) {
-        inflateEnd(&(state->strm));
-        free(state->out);
-        free(state->in);
-    }
-    err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
-    gz_error(state, Z_OK, NULL);
-    free(state->path);
-    ret = close(state->fd);
-    free(state);
-    return ret ? Z_ERRNO : err;
-}
diff --git a/dep/zlib/src/gzwrite.c b/dep/zlib/src/gzwrite.c
deleted file mode 100644
index c7b5651d7..000000000
--- a/dep/zlib/src/gzwrite.c
+++ /dev/null
@@ -1,665 +0,0 @@
-/* gzwrite.c -- zlib functions for writing gzip files
- * Copyright (C) 2004-2017 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#include "gzguts.h"
-
-/* Local functions */
-local int gz_init OF((gz_statep));
-local int gz_comp OF((gz_statep, int));
-local int gz_zero OF((gz_statep, z_off64_t));
-local z_size_t gz_write OF((gz_statep, voidpc, z_size_t));
-
-/* Initialize state for writing a gzip file.  Mark initialization by setting
-   state->size to non-zero.  Return -1 on a memory allocation failure, or 0 on
-   success. */
-local int gz_init(state)
-    gz_statep state;
-{
-    int ret;
-    z_streamp strm = &(state->strm);
-
-    /* allocate input buffer (double size for gzprintf) */
-    state->in = (unsigned char *)malloc(state->want << 1);
-    if (state->in == NULL) {
-        gz_error(state, Z_MEM_ERROR, "out of memory");
-        return -1;
-    }
-
-    /* only need output buffer and deflate state if compressing */
-    if (!state->direct) {
-        /* allocate output buffer */
-        state->out = (unsigned char *)malloc(state->want);
-        if (state->out == NULL) {
-            free(state->in);
-            gz_error(state, Z_MEM_ERROR, "out of memory");
-            return -1;
-        }
-
-        /* allocate deflate memory, set up for gzip compression */
-        strm->zalloc = Z_NULL;
-        strm->zfree = Z_NULL;
-        strm->opaque = Z_NULL;
-        ret = deflateInit2(strm, state->level, Z_DEFLATED,
-                           MAX_WBITS + 16, DEF_MEM_LEVEL, state->strategy);
-        if (ret != Z_OK) {
-            free(state->out);
-            free(state->in);
-            gz_error(state, Z_MEM_ERROR, "out of memory");
-            return -1;
-        }
-        strm->next_in = NULL;
-    }
-
-    /* mark state as initialized */
-    state->size = state->want;
-
-    /* initialize write buffer if compressing */
-    if (!state->direct) {
-        strm->avail_out = state->size;
-        strm->next_out = state->out;
-        state->x.next = strm->next_out;
-    }
-    return 0;
-}
-
-/* Compress whatever is at avail_in and next_in and write to the output file.
-   Return -1 if there is an error writing to the output file or if gz_init()
-   fails to allocate memory, otherwise 0.  flush is assumed to be a valid
-   deflate() flush value.  If flush is Z_FINISH, then the deflate() state is
-   reset to start a new gzip stream.  If gz->direct is true, then simply write
-   to the output file without compressing, and ignore flush. */
-local int gz_comp(state, flush)
-    gz_statep state;
-    int flush;
-{
-    int ret, writ;
-    unsigned have, put, max = ((unsigned)-1 >> 2) + 1;
-    z_streamp strm = &(state->strm);
-
-    /* allocate memory if this is the first time through */
-    if (state->size == 0 && gz_init(state) == -1)
-        return -1;
-
-    /* write directly if requested */
-    if (state->direct) {
-        while (strm->avail_in) {
-            put = strm->avail_in > max ? max : strm->avail_in;
-            writ = write(state->fd, strm->next_in, put);
-            if (writ < 0) {
-                gz_error(state, Z_ERRNO, zstrerror());
-                return -1;
-            }
-            strm->avail_in -= (unsigned)writ;
-            strm->next_in += writ;
-        }
-        return 0;
-    }
-
-    /* run deflate() on provided input until it produces no more output */
-    ret = Z_OK;
-    do {
-        /* write out current buffer contents if full, or if flushing, but if
-           doing Z_FINISH then don't write until we get to Z_STREAM_END */
-        if (strm->avail_out == 0 || (flush != Z_NO_FLUSH &&
-            (flush != Z_FINISH || ret == Z_STREAM_END))) {
-            while (strm->next_out > state->x.next) {
-                put = strm->next_out - state->x.next > (int)max ? max :
-                      (unsigned)(strm->next_out - state->x.next);
-                writ = write(state->fd, state->x.next, put);
-                if (writ < 0) {
-                    gz_error(state, Z_ERRNO, zstrerror());
-                    return -1;
-                }
-                state->x.next += writ;
-            }
-            if (strm->avail_out == 0) {
-                strm->avail_out = state->size;
-                strm->next_out = state->out;
-                state->x.next = state->out;
-            }
-        }
-
-        /* compress */
-        have = strm->avail_out;
-        ret = deflate(strm, flush);
-        if (ret == Z_STREAM_ERROR) {
-            gz_error(state, Z_STREAM_ERROR,
-                      "internal error: deflate stream corrupt");
-            return -1;
-        }
-        have -= strm->avail_out;
-    } while (have);
-
-    /* if that completed a deflate stream, allow another to start */
-    if (flush == Z_FINISH)
-        deflateReset(strm);
-
-    /* all done, no errors */
-    return 0;
-}
-
-/* Compress len zeros to output.  Return -1 on a write error or memory
-   allocation failure by gz_comp(), or 0 on success. */
-local int gz_zero(state, len)
-    gz_statep state;
-    z_off64_t len;
-{
-    int first;
-    unsigned n;
-    z_streamp strm = &(state->strm);
-
-    /* consume whatever's left in the input buffer */
-    if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1)
-        return -1;
-
-    /* compress len zeros (len guaranteed > 0) */
-    first = 1;
-    while (len) {
-        n = GT_OFF(state->size) || (z_off64_t)state->size > len ?
-            (unsigned)len : state->size;
-        if (first) {
-            memset(state->in, 0, n);
-            first = 0;
-        }
-        strm->avail_in = n;
-        strm->next_in = state->in;
-        state->x.pos += n;
-        if (gz_comp(state, Z_NO_FLUSH) == -1)
-            return -1;
-        len -= n;
-    }
-    return 0;
-}
-
-/* Write len bytes from buf to file.  Return the number of bytes written.  If
-   the returned value is less than len, then there was an error. */
-local z_size_t gz_write(state, buf, len)
-    gz_statep state;
-    voidpc buf;
-    z_size_t len;
-{
-    z_size_t put = len;
-
-    /* if len is zero, avoid unnecessary operations */
-    if (len == 0)
-        return 0;
-
-    /* allocate memory if this is the first time through */
-    if (state->size == 0 && gz_init(state) == -1)
-        return 0;
-
-    /* check for seek request */
-    if (state->seek) {
-        state->seek = 0;
-        if (gz_zero(state, state->skip) == -1)
-            return 0;
-    }
-
-    /* for small len, copy to input buffer, otherwise compress directly */
-    if (len < state->size) {
-        /* copy to input buffer, compress when full */
-        do {
-            unsigned have, copy;
-
-            if (state->strm.avail_in == 0)
-                state->strm.next_in = state->in;
-            have = (unsigned)((state->strm.next_in + state->strm.avail_in) -
-                              state->in);
-            copy = state->size - have;
-            if (copy > len)
-                copy = len;
-            memcpy(state->in + have, buf, copy);
-            state->strm.avail_in += copy;
-            state->x.pos += copy;
-            buf = (const char *)buf + copy;
-            len -= copy;
-            if (len && gz_comp(state, Z_NO_FLUSH) == -1)
-                return 0;
-        } while (len);
-    }
-    else {
-        /* consume whatever's left in the input buffer */
-        if (state->strm.avail_in && gz_comp(state, Z_NO_FLUSH) == -1)
-            return 0;
-
-        /* directly compress user buffer to file */
-        state->strm.next_in = (z_const Bytef *)buf;
-        do {
-            unsigned n = (unsigned)-1;
-            if (n > len)
-                n = len;
-            state->strm.avail_in = n;
-            state->x.pos += n;
-            if (gz_comp(state, Z_NO_FLUSH) == -1)
-                return 0;
-            len -= n;
-        } while (len);
-    }
-
-    /* input was all buffered or compressed */
-    return put;
-}
-
-/* -- see zlib.h -- */
-int ZEXPORT gzwrite(file, buf, len)
-    gzFile file;
-    voidpc buf;
-    unsigned len;
-{
-    gz_statep state;
-
-    /* get internal structure */
-    if (file == NULL)
-        return 0;
-    state = (gz_statep)file;
-
-    /* check that we're writing and that there's no error */
-    if (state->mode != GZ_WRITE || state->err != Z_OK)
-        return 0;
-
-    /* since an int is returned, make sure len fits in one, otherwise return
-       with an error (this avoids a flaw in the interface) */
-    if ((int)len < 0) {
-        gz_error(state, Z_DATA_ERROR, "requested length does not fit in int");
-        return 0;
-    }
-
-    /* write len bytes from buf (the return value will fit in an int) */
-    return (int)gz_write(state, buf, len);
-}
-
-/* -- see zlib.h -- */
-z_size_t ZEXPORT gzfwrite(buf, size, nitems, file)
-    voidpc buf;
-    z_size_t size;
-    z_size_t nitems;
-    gzFile file;
-{
-    z_size_t len;
-    gz_statep state;
-
-    /* get internal structure */
-    if (file == NULL)
-        return 0;
-    state = (gz_statep)file;
-
-    /* check that we're writing and that there's no error */
-    if (state->mode != GZ_WRITE || state->err != Z_OK)
-        return 0;
-
-    /* compute bytes to read -- error on overflow */
-    len = nitems * size;
-    if (size && len / size != nitems) {
-        gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
-        return 0;
-    }
-
-    /* write len bytes to buf, return the number of full items written */
-    return len ? gz_write(state, buf, len) / size : 0;
-}
-
-/* -- see zlib.h -- */
-int ZEXPORT gzputc(file, c)
-    gzFile file;
-    int c;
-{
-    unsigned have;
-    unsigned char buf[1];
-    gz_statep state;
-    z_streamp strm;
-
-    /* get internal structure */
-    if (file == NULL)
-        return -1;
-    state = (gz_statep)file;
-    strm = &(state->strm);
-
-    /* check that we're writing and that there's no error */
-    if (state->mode != GZ_WRITE || state->err != Z_OK)
-        return -1;
-
-    /* check for seek request */
-    if (state->seek) {
-        state->seek = 0;
-        if (gz_zero(state, state->skip) == -1)
-            return -1;
-    }
-
-    /* try writing to input buffer for speed (state->size == 0 if buffer not
-       initialized) */
-    if (state->size) {
-        if (strm->avail_in == 0)
-            strm->next_in = state->in;
-        have = (unsigned)((strm->next_in + strm->avail_in) - state->in);
-        if (have < state->size) {
-            state->in[have] = (unsigned char)c;
-            strm->avail_in++;
-            state->x.pos++;
-            return c & 0xff;
-        }
-    }
-
-    /* no room in buffer or not initialized, use gz_write() */
-    buf[0] = (unsigned char)c;
-    if (gz_write(state, buf, 1) != 1)
-        return -1;
-    return c & 0xff;
-}
-
-/* -- see zlib.h -- */
-int ZEXPORT gzputs(file, str)
-    gzFile file;
-    const char *str;
-{
-    int ret;
-    z_size_t len;
-    gz_statep state;
-
-    /* get internal structure */
-    if (file == NULL)
-        return -1;
-    state = (gz_statep)file;
-
-    /* check that we're writing and that there's no error */
-    if (state->mode != GZ_WRITE || state->err != Z_OK)
-        return -1;
-
-    /* write string */
-    len = strlen(str);
-    ret = gz_write(state, str, len);
-    return ret == 0 && len != 0 ? -1 : ret;
-}
-
-#if defined(STDC) || defined(Z_HAVE_STDARG_H)
-#include <stdarg.h>
-
-/* -- see zlib.h -- */
-int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va)
-{
-    int len;
-    unsigned left;
-    char *next;
-    gz_statep state;
-    z_streamp strm;
-
-    /* get internal structure */
-    if (file == NULL)
-        return Z_STREAM_ERROR;
-    state = (gz_statep)file;
-    strm = &(state->strm);
-
-    /* check that we're writing and that there's no error */
-    if (state->mode != GZ_WRITE || state->err != Z_OK)
-        return Z_STREAM_ERROR;
-
-    /* make sure we have some buffer space */
-    if (state->size == 0 && gz_init(state) == -1)
-        return state->err;
-
-    /* check for seek request */
-    if (state->seek) {
-        state->seek = 0;
-        if (gz_zero(state, state->skip) == -1)
-            return state->err;
-    }
-
-    /* do the printf() into the input buffer, put length in len -- the input
-       buffer is double-sized just for this function, so there is guaranteed to
-       be state->size bytes available after the current contents */
-    if (strm->avail_in == 0)
-        strm->next_in = state->in;
-    next = (char *)(state->in + (strm->next_in - state->in) + strm->avail_in);
-    next[state->size - 1] = 0;
-#ifdef NO_vsnprintf
-#  ifdef HAS_vsprintf_void
-    (void)vsprintf(next, format, va);
-    for (len = 0; len < state->size; len++)
-        if (next[len] == 0) break;
-#  else
-    len = vsprintf(next, format, va);
-#  endif
-#else
-#  ifdef HAS_vsnprintf_void
-    (void)vsnprintf(next, state->size, format, va);
-    len = strlen(next);
-#  else
-    len = vsnprintf(next, state->size, format, va);
-#  endif
-#endif
-
-    /* check that printf() results fit in buffer */
-    if (len == 0 || (unsigned)len >= state->size || next[state->size - 1] != 0)
-        return 0;
-
-    /* update buffer and position, compress first half if past that */
-    strm->avail_in += (unsigned)len;
-    state->x.pos += len;
-    if (strm->avail_in >= state->size) {
-        left = strm->avail_in - state->size;
-        strm->avail_in = state->size;
-        if (gz_comp(state, Z_NO_FLUSH) == -1)
-            return state->err;
-        memcpy(state->in, state->in + state->size, left);
-        strm->next_in = state->in;
-        strm->avail_in = left;
-    }
-    return len;
-}
-
-int ZEXPORTVA gzprintf(gzFile file, const char *format, ...)
-{
-    va_list va;
-    int ret;
-
-    va_start(va, format);
-    ret = gzvprintf(file, format, va);
-    va_end(va);
-    return ret;
-}
-
-#else /* !STDC && !Z_HAVE_STDARG_H */
-
-/* -- see zlib.h -- */
-int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
-                       a11, a12, a13, a14, a15, a16, a17, a18, a19, a20)
-    gzFile file;
-    const char *format;
-    int a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
-        a11, a12, a13, a14, a15, a16, a17, a18, a19, a20;
-{
-    unsigned len, left;
-    char *next;
-    gz_statep state;
-    z_streamp strm;
-
-    /* get internal structure */
-    if (file == NULL)
-        return Z_STREAM_ERROR;
-    state = (gz_statep)file;
-    strm = &(state->strm);
-
-    /* check that can really pass pointer in ints */
-    if (sizeof(int) != sizeof(void *))
-        return Z_STREAM_ERROR;
-
-    /* check that we're writing and that there's no error */
-    if (state->mode != GZ_WRITE || state->err != Z_OK)
-        return Z_STREAM_ERROR;
-
-    /* make sure we have some buffer space */
-    if (state->size == 0 && gz_init(state) == -1)
-        return state->error;
-
-    /* check for seek request */
-    if (state->seek) {
-        state->seek = 0;
-        if (gz_zero(state, state->skip) == -1)
-            return state->error;
-    }
-
-    /* do the printf() into the input buffer, put length in len -- the input
-       buffer is double-sized just for this function, so there is guaranteed to
-       be state->size bytes available after the current contents */
-    if (strm->avail_in == 0)
-        strm->next_in = state->in;
-    next = (char *)(strm->next_in + strm->avail_in);
-    next[state->size - 1] = 0;
-#ifdef NO_snprintf
-#  ifdef HAS_sprintf_void
-    sprintf(next, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12,
-            a13, a14, a15, a16, a17, a18, a19, a20);
-    for (len = 0; len < size; len++)
-        if (next[len] == 0)
-            break;
-#  else
-    len = sprintf(next, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11,
-                  a12, a13, a14, a15, a16, a17, a18, a19, a20);
-#  endif
-#else
-#  ifdef HAS_snprintf_void
-    snprintf(next, state->size, format, a1, a2, a3, a4, a5, a6, a7, a8, a9,
-             a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
-    len = strlen(next);
-#  else
-    len = snprintf(next, state->size, format, a1, a2, a3, a4, a5, a6, a7, a8,
-                   a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
-#  endif
-#endif
-
-    /* check that printf() results fit in buffer */
-    if (len == 0 || len >= state->size || next[state->size - 1] != 0)
-        return 0;
-
-    /* update buffer and position, compress first half if past that */
-    strm->avail_in += len;
-    state->x.pos += len;
-    if (strm->avail_in >= state->size) {
-        left = strm->avail_in - state->size;
-        strm->avail_in = state->size;
-        if (gz_comp(state, Z_NO_FLUSH) == -1)
-            return state->err;
-        memcpy(state->in, state->in + state->size, left);
-        strm->next_in = state->in;
-        strm->avail_in = left;
-    }
-    return (int)len;
-}
-
-#endif
-
-/* -- see zlib.h -- */
-int ZEXPORT gzflush(file, flush)
-    gzFile file;
-    int flush;
-{
-    gz_statep state;
-
-    /* get internal structure */
-    if (file == NULL)
-        return Z_STREAM_ERROR;
-    state = (gz_statep)file;
-
-    /* check that we're writing and that there's no error */
-    if (state->mode != GZ_WRITE || state->err != Z_OK)
-        return Z_STREAM_ERROR;
-
-    /* check flush parameter */
-    if (flush < 0 || flush > Z_FINISH)
-        return Z_STREAM_ERROR;
-
-    /* check for seek request */
-    if (state->seek) {
-        state->seek = 0;
-        if (gz_zero(state, state->skip) == -1)
-            return state->err;
-    }
-
-    /* compress remaining data with requested flush */
-    (void)gz_comp(state, flush);
-    return state->err;
-}
-
-/* -- see zlib.h -- */
-int ZEXPORT gzsetparams(file, level, strategy)
-    gzFile file;
-    int level;
-    int strategy;
-{
-    gz_statep state;
-    z_streamp strm;
-
-    /* get internal structure */
-    if (file == NULL)
-        return Z_STREAM_ERROR;
-    state = (gz_statep)file;
-    strm = &(state->strm);
-
-    /* check that we're writing and that there's no error */
-    if (state->mode != GZ_WRITE || state->err != Z_OK)
-        return Z_STREAM_ERROR;
-
-    /* if no change is requested, then do nothing */
-    if (level == state->level && strategy == state->strategy)
-        return Z_OK;
-
-    /* check for seek request */
-    if (state->seek) {
-        state->seek = 0;
-        if (gz_zero(state, state->skip) == -1)
-            return state->err;
-    }
-
-    /* change compression parameters for subsequent input */
-    if (state->size) {
-        /* flush previous input with previous parameters before changing */
-        if (strm->avail_in && gz_comp(state, Z_BLOCK) == -1)
-            return state->err;
-        deflateParams(strm, level, strategy);
-    }
-    state->level = level;
-    state->strategy = strategy;
-    return Z_OK;
-}
-
-/* -- see zlib.h -- */
-int ZEXPORT gzclose_w(file)
-    gzFile file;
-{
-    int ret = Z_OK;
-    gz_statep state;
-
-    /* get internal structure */
-    if (file == NULL)
-        return Z_STREAM_ERROR;
-    state = (gz_statep)file;
-
-    /* check that we're writing */
-    if (state->mode != GZ_WRITE)
-        return Z_STREAM_ERROR;
-
-    /* check for seek request */
-    if (state->seek) {
-        state->seek = 0;
-        if (gz_zero(state, state->skip) == -1)
-            ret = state->err;
-    }
-
-    /* flush, free memory, and close file */
-    if (gz_comp(state, Z_FINISH) == -1)
-        ret = state->err;
-    if (state->size) {
-        if (!state->direct) {
-            (void)deflateEnd(&(state->strm));
-            free(state->out);
-        }
-        free(state->in);
-    }
-    gz_error(state, Z_OK, NULL);
-    free(state->path);
-    if (close(state->fd) == -1)
-        ret = Z_ERRNO;
-    free(state);
-    return ret;
-}
diff --git a/dep/zlib/src/infback.c b/dep/zlib/src/infback.c
deleted file mode 100644
index 59679ecbf..000000000
--- a/dep/zlib/src/infback.c
+++ /dev/null
@@ -1,640 +0,0 @@
-/* infback.c -- inflate using a call-back interface
- * Copyright (C) 1995-2016 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/*
-   This code is largely copied from inflate.c.  Normally either infback.o or
-   inflate.o would be linked into an application--not both.  The interface
-   with inffast.c is retained so that optimized assembler-coded versions of
-   inflate_fast() can be used with either inflate.c or infback.c.
- */
-
-#include "zutil.h"
-#include "inftrees.h"
-#include "inflate.h"
-#include "inffast.h"
-
-/* function prototypes */
-local void fixedtables OF((struct inflate_state FAR *state));
-
-/*
-   strm provides memory allocation functions in zalloc and zfree, or
-   Z_NULL to use the library memory allocation functions.
-
-   windowBits is in the range 8..15, and window is a user-supplied
-   window and output buffer that is 2**windowBits bytes.
- */
-int ZEXPORT inflateBackInit_(strm, windowBits, window, version, stream_size)
-z_streamp strm;
-int windowBits;
-unsigned char FAR *window;
-const char *version;
-int stream_size;
-{
-    struct inflate_state FAR *state;
-
-    if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
-        stream_size != (int)(sizeof(z_stream)))
-        return Z_VERSION_ERROR;
-    if (strm == Z_NULL || window == Z_NULL ||
-        windowBits < 8 || windowBits > 15)
-        return Z_STREAM_ERROR;
-    strm->msg = Z_NULL;                 /* in case we return an error */
-    if (strm->zalloc == (alloc_func)0) {
-#ifdef Z_SOLO
-        return Z_STREAM_ERROR;
-#else
-        strm->zalloc = zcalloc;
-        strm->opaque = (voidpf)0;
-#endif
-    }
-    if (strm->zfree == (free_func)0)
-#ifdef Z_SOLO
-        return Z_STREAM_ERROR;
-#else
-    strm->zfree = zcfree;
-#endif
-    state = (struct inflate_state FAR *)ZALLOC(strm, 1,
-                                               sizeof(struct inflate_state));
-    if (state == Z_NULL) return Z_MEM_ERROR;
-    Tracev((stderr, "inflate: allocated\n"));
-    strm->state = (struct internal_state FAR *)state;
-    state->dmax = 32768U;
-    state->wbits = (uInt)windowBits;
-    state->wsize = 1U << windowBits;
-    state->window = window;
-    state->wnext = 0;
-    state->whave = 0;
-    return Z_OK;
-}
-
-/*
-   Return state with length and distance decoding tables and index sizes set to
-   fixed code decoding.  Normally this returns fixed tables from inffixed.h.
-   If BUILDFIXED is defined, then instead this routine builds the tables the
-   first time it's called, and returns those tables the first time and
-   thereafter.  This reduces the size of the code by about 2K bytes, in
-   exchange for a little execution time.  However, BUILDFIXED should not be
-   used for threaded applications, since the rewriting of the tables and virgin
-   may not be thread-safe.
- */
-local void fixedtables(state)
-struct inflate_state FAR *state;
-{
-#ifdef BUILDFIXED
-    static int virgin = 1;
-    static code *lenfix, *distfix;
-    static code fixed[544];
-
-    /* build fixed huffman tables if first call (may not be thread safe) */
-    if (virgin) {
-        unsigned sym, bits;
-        static code *next;
-
-        /* literal/length table */
-        sym = 0;
-        while (sym < 144) state->lens[sym++] = 8;
-        while (sym < 256) state->lens[sym++] = 9;
-        while (sym < 280) state->lens[sym++] = 7;
-        while (sym < 288) state->lens[sym++] = 8;
-        next = fixed;
-        lenfix = next;
-        bits = 9;
-        inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work);
-
-        /* distance table */
-        sym = 0;
-        while (sym < 32) state->lens[sym++] = 5;
-        distfix = next;
-        bits = 5;
-        inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work);
-
-        /* do this just once */
-        virgin = 0;
-    }
-#else /* !BUILDFIXED */
-#   include "inffixed.h"
-#endif /* BUILDFIXED */
-    state->lencode = lenfix;
-    state->lenbits = 9;
-    state->distcode = distfix;
-    state->distbits = 5;
-}
-
-/* Macros for inflateBack(): */
-
-/* Load returned state from inflate_fast() */
-#define LOAD() \
-    do { \
-        put = strm->next_out; \
-        left = strm->avail_out; \
-        next = strm->next_in; \
-        have = strm->avail_in; \
-        hold = state->hold; \
-        bits = state->bits; \
-    } while (0)
-
-/* Set state from registers for inflate_fast() */
-#define RESTORE() \
-    do { \
-        strm->next_out = put; \
-        strm->avail_out = left; \
-        strm->next_in = next; \
-        strm->avail_in = have; \
-        state->hold = hold; \
-        state->bits = bits; \
-    } while (0)
-
-/* Clear the input bit accumulator */
-#define INITBITS() \
-    do { \
-        hold = 0; \
-        bits = 0; \
-    } while (0)
-
-/* Assure that some input is available.  If input is requested, but denied,
-   then return a Z_BUF_ERROR from inflateBack(). */
-#define PULL() \
-    do { \
-        if (have == 0) { \
-            have = in(in_desc, &next); \
-            if (have == 0) { \
-                next = Z_NULL; \
-                ret = Z_BUF_ERROR; \
-                goto inf_leave; \
-            } \
-        } \
-    } while (0)
-
-/* Get a byte of input into the bit accumulator, or return from inflateBack()
-   with an error if there is no input available. */
-#define PULLBYTE() \
-    do { \
-        PULL(); \
-        have--; \
-        hold += (unsigned long)(*next++) << bits; \
-        bits += 8; \
-    } while (0)
-
-/* Assure that there are at least n bits in the bit accumulator.  If there is
-   not enough available input to do that, then return from inflateBack() with
-   an error. */
-#define NEEDBITS(n) \
-    do { \
-        while (bits < (unsigned)(n)) \
-            PULLBYTE(); \
-    } while (0)
-
-/* Return the low n bits of the bit accumulator (n < 16) */
-#define BITS(n) \
-    ((unsigned)hold & ((1U << (n)) - 1))
-
-/* Remove n bits from the bit accumulator */
-#define DROPBITS(n) \
-    do { \
-        hold >>= (n); \
-        bits -= (unsigned)(n); \
-    } while (0)
-
-/* Remove zero to seven bits as needed to go to a byte boundary */
-#define BYTEBITS() \
-    do { \
-        hold >>= bits & 7; \
-        bits -= bits & 7; \
-    } while (0)
-
-/* Assure that some output space is available, by writing out the window
-   if it's full.  If the write fails, return from inflateBack() with a
-   Z_BUF_ERROR. */
-#define ROOM() \
-    do { \
-        if (left == 0) { \
-            put = state->window; \
-            left = state->wsize; \
-            state->whave = left; \
-            if (out(out_desc, put, left)) { \
-                ret = Z_BUF_ERROR; \
-                goto inf_leave; \
-            } \
-        } \
-    } while (0)
-
-/*
-   strm provides the memory allocation functions and window buffer on input,
-   and provides information on the unused input on return.  For Z_DATA_ERROR
-   returns, strm will also provide an error message.
-
-   in() and out() are the call-back input and output functions.  When
-   inflateBack() needs more input, it calls in().  When inflateBack() has
-   filled the window with output, or when it completes with data in the
-   window, it calls out() to write out the data.  The application must not
-   change the provided input until in() is called again or inflateBack()
-   returns.  The application must not change the window/output buffer until
-   inflateBack() returns.
-
-   in() and out() are called with a descriptor parameter provided in the
-   inflateBack() call.  This parameter can be a structure that provides the
-   information required to do the read or write, as well as accumulated
-   information on the input and output such as totals and check values.
-
-   in() should return zero on failure.  out() should return non-zero on
-   failure.  If either in() or out() fails, than inflateBack() returns a
-   Z_BUF_ERROR.  strm->next_in can be checked for Z_NULL to see whether it
-   was in() or out() that caused in the error.  Otherwise,  inflateBack()
-   returns Z_STREAM_END on success, Z_DATA_ERROR for an deflate format
-   error, or Z_MEM_ERROR if it could not allocate memory for the state.
-   inflateBack() can also return Z_STREAM_ERROR if the input parameters
-   are not correct, i.e. strm is Z_NULL or the state was not initialized.
- */
-int ZEXPORT inflateBack(strm, in, in_desc, out, out_desc)
-z_streamp strm;
-in_func in;
-void FAR *in_desc;
-out_func out;
-void FAR *out_desc;
-{
-    struct inflate_state FAR *state;
-    z_const unsigned char FAR *next;    /* next input */
-    unsigned char FAR *put;     /* next output */
-    unsigned have, left;        /* available input and output */
-    unsigned long hold;         /* bit buffer */
-    unsigned bits;              /* bits in bit buffer */
-    unsigned copy;              /* number of stored or match bytes to copy */
-    unsigned char FAR *from;    /* where to copy match bytes from */
-    code here;                  /* current decoding table entry */
-    code last;                  /* parent table entry */
-    unsigned len;               /* length to copy for repeats, bits to drop */
-    int ret;                    /* return code */
-    static const unsigned short order[19] = /* permutation of code lengths */
-        {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
-
-    /* Check that the strm exists and that the state was initialized */
-    if (strm == Z_NULL || strm->state == Z_NULL)
-        return Z_STREAM_ERROR;
-    state = (struct inflate_state FAR *)strm->state;
-
-    /* Reset the state */
-    strm->msg = Z_NULL;
-    state->mode = TYPE;
-    state->last = 0;
-    state->whave = 0;
-    next = strm->next_in;
-    have = next != Z_NULL ? strm->avail_in : 0;
-    hold = 0;
-    bits = 0;
-    put = state->window;
-    left = state->wsize;
-
-    /* Inflate until end of block marked as last */
-    for (;;)
-        switch (state->mode) {
-        case TYPE:
-            /* determine and dispatch block type */
-            if (state->last) {
-                BYTEBITS();
-                state->mode = DONE;
-                break;
-            }
-            NEEDBITS(3);
-            state->last = BITS(1);
-            DROPBITS(1);
-            switch (BITS(2)) {
-            case 0:                             /* stored block */
-                Tracev((stderr, "inflate:     stored block%s\n",
-                        state->last ? " (last)" : ""));
-                state->mode = STORED;
-                break;
-            case 1:                             /* fixed block */
-                fixedtables(state);
-                Tracev((stderr, "inflate:     fixed codes block%s\n",
-                        state->last ? " (last)" : ""));
-                state->mode = LEN;              /* decode codes */
-                break;
-            case 2:                             /* dynamic block */
-                Tracev((stderr, "inflate:     dynamic codes block%s\n",
-                        state->last ? " (last)" : ""));
-                state->mode = TABLE;
-                break;
-            case 3:
-                strm->msg = (char *)"invalid block type";
-                state->mode = BAD;
-            }
-            DROPBITS(2);
-            break;
-
-        case STORED:
-            /* get and verify stored block length */
-            BYTEBITS();                         /* go to byte boundary */
-            NEEDBITS(32);
-            if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) {
-                strm->msg = (char *)"invalid stored block lengths";
-                state->mode = BAD;
-                break;
-            }
-            state->length = (unsigned)hold & 0xffff;
-            Tracev((stderr, "inflate:       stored length %u\n",
-                    state->length));
-            INITBITS();
-
-            /* copy stored block from input to output */
-            while (state->length != 0) {
-                copy = state->length;
-                PULL();
-                ROOM();
-                if (copy > have) copy = have;
-                if (copy > left) copy = left;
-                zmemcpy(put, next, copy);
-                have -= copy;
-                next += copy;
-                left -= copy;
-                put += copy;
-                state->length -= copy;
-            }
-            Tracev((stderr, "inflate:       stored end\n"));
-            state->mode = TYPE;
-            break;
-
-        case TABLE:
-            /* get dynamic table entries descriptor */
-            NEEDBITS(14);
-            state->nlen = BITS(5) + 257;
-            DROPBITS(5);
-            state->ndist = BITS(5) + 1;
-            DROPBITS(5);
-            state->ncode = BITS(4) + 4;
-            DROPBITS(4);
-#ifndef PKZIP_BUG_WORKAROUND
-            if (state->nlen > 286 || state->ndist > 30) {
-                strm->msg = (char *)"too many length or distance symbols";
-                state->mode = BAD;
-                break;
-            }
-#endif
-            Tracev((stderr, "inflate:       table sizes ok\n"));
-
-            /* get code length code lengths (not a typo) */
-            state->have = 0;
-            while (state->have < state->ncode) {
-                NEEDBITS(3);
-                state->lens[order[state->have++]] = (unsigned short)BITS(3);
-                DROPBITS(3);
-            }
-            while (state->have < 19)
-                state->lens[order[state->have++]] = 0;
-            state->next = state->codes;
-            state->lencode = (code const FAR *)(state->next);
-            state->lenbits = 7;
-            ret = inflate_table(CODES, state->lens, 19, &(state->next),
-                                &(state->lenbits), state->work);
-            if (ret) {
-                strm->msg = (char *)"invalid code lengths set";
-                state->mode = BAD;
-                break;
-            }
-            Tracev((stderr, "inflate:       code lengths ok\n"));
-
-            /* get length and distance code code lengths */
-            state->have = 0;
-            while (state->have < state->nlen + state->ndist) {
-                for (;;) {
-                    here = state->lencode[BITS(state->lenbits)];
-                    if ((unsigned)(here.bits) <= bits) break;
-                    PULLBYTE();
-                }
-                if (here.val < 16) {
-                    DROPBITS(here.bits);
-                    state->lens[state->have++] = here.val;
-                }
-                else {
-                    if (here.val == 16) {
-                        NEEDBITS(here.bits + 2);
-                        DROPBITS(here.bits);
-                        if (state->have == 0) {
-                            strm->msg = (char *)"invalid bit length repeat";
-                            state->mode = BAD;
-                            break;
-                        }
-                        len = (unsigned)(state->lens[state->have - 1]);
-                        copy = 3 + BITS(2);
-                        DROPBITS(2);
-                    }
-                    else if (here.val == 17) {
-                        NEEDBITS(here.bits + 3);
-                        DROPBITS(here.bits);
-                        len = 0;
-                        copy = 3 + BITS(3);
-                        DROPBITS(3);
-                    }
-                    else {
-                        NEEDBITS(here.bits + 7);
-                        DROPBITS(here.bits);
-                        len = 0;
-                        copy = 11 + BITS(7);
-                        DROPBITS(7);
-                    }
-                    if (state->have + copy > state->nlen + state->ndist) {
-                        strm->msg = (char *)"invalid bit length repeat";
-                        state->mode = BAD;
-                        break;
-                    }
-                    while (copy--)
-                        state->lens[state->have++] = (unsigned short)len;
-                }
-            }
-
-            /* handle error breaks in while */
-            if (state->mode == BAD) break;
-
-            /* check for end-of-block code (better have one) */
-            if (state->lens[256] == 0) {
-                strm->msg = (char *)"invalid code -- missing end-of-block";
-                state->mode = BAD;
-                break;
-            }
-
-            /* build code tables -- note: do not change the lenbits or distbits
-               values here (9 and 6) without reading the comments in inftrees.h
-               concerning the ENOUGH constants, which depend on those values */
-            state->next = state->codes;
-            state->lencode = (code const FAR *)(state->next);
-            state->lenbits = 9;
-            ret = inflate_table(LENS, state->lens, state->nlen, &(state->next),
-                                &(state->lenbits), state->work);
-            if (ret) {
-                strm->msg = (char *)"invalid literal/lengths set";
-                state->mode = BAD;
-                break;
-            }
-            state->distcode = (code const FAR *)(state->next);
-            state->distbits = 6;
-            ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist,
-                            &(state->next), &(state->distbits), state->work);
-            if (ret) {
-                strm->msg = (char *)"invalid distances set";
-                state->mode = BAD;
-                break;
-            }
-            Tracev((stderr, "inflate:       codes ok\n"));
-            state->mode = LEN;
-
-        case LEN:
-            /* use inflate_fast() if we have enough input and output */
-            if (have >= 6 && left >= 258) {
-                RESTORE();
-                if (state->whave < state->wsize)
-                    state->whave = state->wsize - left;
-                inflate_fast(strm, state->wsize);
-                LOAD();
-                break;
-            }
-
-            /* get a literal, length, or end-of-block code */
-            for (;;) {
-                here = state->lencode[BITS(state->lenbits)];
-                if ((unsigned)(here.bits) <= bits) break;
-                PULLBYTE();
-            }
-            if (here.op && (here.op & 0xf0) == 0) {
-                last = here;
-                for (;;) {
-                    here = state->lencode[last.val +
-                            (BITS(last.bits + last.op) >> last.bits)];
-                    if ((unsigned)(last.bits + here.bits) <= bits) break;
-                    PULLBYTE();
-                }
-                DROPBITS(last.bits);
-            }
-            DROPBITS(here.bits);
-            state->length = (unsigned)here.val;
-
-            /* process literal */
-            if (here.op == 0) {
-                Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
-                        "inflate:         literal '%c'\n" :
-                        "inflate:         literal 0x%02x\n", here.val));
-                ROOM();
-                *put++ = (unsigned char)(state->length);
-                left--;
-                state->mode = LEN;
-                break;
-            }
-
-            /* process end of block */
-            if (here.op & 32) {
-                Tracevv((stderr, "inflate:         end of block\n"));
-                state->mode = TYPE;
-                break;
-            }
-
-            /* invalid code */
-            if (here.op & 64) {
-                strm->msg = (char *)"invalid literal/length code";
-                state->mode = BAD;
-                break;
-            }
-
-            /* length code -- get extra bits, if any */
-            state->extra = (unsigned)(here.op) & 15;
-            if (state->extra != 0) {
-                NEEDBITS(state->extra);
-                state->length += BITS(state->extra);
-                DROPBITS(state->extra);
-            }
-            Tracevv((stderr, "inflate:         length %u\n", state->length));
-
-            /* get distance code */
-            for (;;) {
-                here = state->distcode[BITS(state->distbits)];
-                if ((unsigned)(here.bits) <= bits) break;
-                PULLBYTE();
-            }
-            if ((here.op & 0xf0) == 0) {
-                last = here;
-                for (;;) {
-                    here = state->distcode[last.val +
-                            (BITS(last.bits + last.op) >> last.bits)];
-                    if ((unsigned)(last.bits + here.bits) <= bits) break;
-                    PULLBYTE();
-                }
-                DROPBITS(last.bits);
-            }
-            DROPBITS(here.bits);
-            if (here.op & 64) {
-                strm->msg = (char *)"invalid distance code";
-                state->mode = BAD;
-                break;
-            }
-            state->offset = (unsigned)here.val;
-
-            /* get distance extra bits, if any */
-            state->extra = (unsigned)(here.op) & 15;
-            if (state->extra != 0) {
-                NEEDBITS(state->extra);
-                state->offset += BITS(state->extra);
-                DROPBITS(state->extra);
-            }
-            if (state->offset > state->wsize - (state->whave < state->wsize ?
-                                                left : 0)) {
-                strm->msg = (char *)"invalid distance too far back";
-                state->mode = BAD;
-                break;
-            }
-            Tracevv((stderr, "inflate:         distance %u\n", state->offset));
-
-            /* copy match from window to output */
-            do {
-                ROOM();
-                copy = state->wsize - state->offset;
-                if (copy < left) {
-                    from = put + copy;
-                    copy = left - copy;
-                }
-                else {
-                    from = put - state->offset;
-                    copy = left;
-                }
-                if (copy > state->length) copy = state->length;
-                state->length -= copy;
-                left -= copy;
-                do {
-                    *put++ = *from++;
-                } while (--copy);
-            } while (state->length != 0);
-            break;
-
-        case DONE:
-            /* inflate stream terminated properly -- write leftover output */
-            ret = Z_STREAM_END;
-            if (left < state->wsize) {
-                if (out(out_desc, state->window, state->wsize - left))
-                    ret = Z_BUF_ERROR;
-            }
-            goto inf_leave;
-
-        case BAD:
-            ret = Z_DATA_ERROR;
-            goto inf_leave;
-
-        default:                /* can't happen, but makes compilers happy */
-            ret = Z_STREAM_ERROR;
-            goto inf_leave;
-        }
-
-    /* Return unused input */
-  inf_leave:
-    strm->next_in = next;
-    strm->avail_in = have;
-    return ret;
-}
-
-int ZEXPORT inflateBackEnd(strm)
-z_streamp strm;
-{
-    if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0)
-        return Z_STREAM_ERROR;
-    ZFREE(strm, strm->state);
-    strm->state = Z_NULL;
-    Tracev((stderr, "inflate: end\n"));
-    return Z_OK;
-}
diff --git a/dep/zlib/src/inffast.c b/dep/zlib/src/inffast.c
deleted file mode 100644
index 0dbd1dbc0..000000000
--- a/dep/zlib/src/inffast.c
+++ /dev/null
@@ -1,323 +0,0 @@
-/* inffast.c -- fast decoding
- * Copyright (C) 1995-2017 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#include "zutil.h"
-#include "inftrees.h"
-#include "inflate.h"
-#include "inffast.h"
-
-#ifdef ASMINF
-#  pragma message("Assembler code may have bugs -- use at your own risk")
-#else
-
-/*
-   Decode literal, length, and distance codes and write out the resulting
-   literal and match bytes until either not enough input or output is
-   available, an end-of-block is encountered, or a data error is encountered.
-   When large enough input and output buffers are supplied to inflate(), for
-   example, a 16K input buffer and a 64K output buffer, more than 95% of the
-   inflate execution time is spent in this routine.
-
-   Entry assumptions:
-
-        state->mode == LEN
-        strm->avail_in >= 6
-        strm->avail_out >= 258
-        start >= strm->avail_out
-        state->bits < 8
-
-   On return, state->mode is one of:
-
-        LEN -- ran out of enough output space or enough available input
-        TYPE -- reached end of block code, inflate() to interpret next block
-        BAD -- error in block data
-
-   Notes:
-
-    - The maximum input bits used by a length/distance pair is 15 bits for the
-      length code, 5 bits for the length extra, 15 bits for the distance code,
-      and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
-      Therefore if strm->avail_in >= 6, then there is enough input to avoid
-      checking for available input while decoding.
-
-    - The maximum bytes that a single length/distance pair can output is 258
-      bytes, which is the maximum length that can be coded.  inflate_fast()
-      requires strm->avail_out >= 258 for each loop to avoid checking for
-      output space.
- */
-void ZLIB_INTERNAL inflate_fast(strm, start)
-z_streamp strm;
-unsigned start;         /* inflate()'s starting value for strm->avail_out */
-{
-    struct inflate_state FAR *state;
-    z_const unsigned char FAR *in;      /* local strm->next_in */
-    z_const unsigned char FAR *last;    /* have enough input while in < last */
-    unsigned char FAR *out;     /* local strm->next_out */
-    unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
-    unsigned char FAR *end;     /* while out < end, enough space available */
-#ifdef INFLATE_STRICT
-    unsigned dmax;              /* maximum distance from zlib header */
-#endif
-    unsigned wsize;             /* window size or zero if not using window */
-    unsigned whave;             /* valid bytes in the window */
-    unsigned wnext;             /* window write index */
-    unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
-    unsigned long hold;         /* local strm->hold */
-    unsigned bits;              /* local strm->bits */
-    code const FAR *lcode;      /* local strm->lencode */
-    code const FAR *dcode;      /* local strm->distcode */
-    unsigned lmask;             /* mask for first level of length codes */
-    unsigned dmask;             /* mask for first level of distance codes */
-    code here;                  /* retrieved table entry */
-    unsigned op;                /* code bits, operation, extra bits, or */
-                                /*  window position, window bytes to copy */
-    unsigned len;               /* match length, unused bytes */
-    unsigned dist;              /* match distance */
-    unsigned char FAR *from;    /* where to copy match from */
-
-    /* copy state to local variables */
-    state = (struct inflate_state FAR *)strm->state;
-    in = strm->next_in;
-    last = in + (strm->avail_in - 5);
-    out = strm->next_out;
-    beg = out - (start - strm->avail_out);
-    end = out + (strm->avail_out - 257);
-#ifdef INFLATE_STRICT
-    dmax = state->dmax;
-#endif
-    wsize = state->wsize;
-    whave = state->whave;
-    wnext = state->wnext;
-    window = state->window;
-    hold = state->hold;
-    bits = state->bits;
-    lcode = state->lencode;
-    dcode = state->distcode;
-    lmask = (1U << state->lenbits) - 1;
-    dmask = (1U << state->distbits) - 1;
-
-    /* decode literals and length/distances until end-of-block or not enough
-       input data or output space */
-    do {
-        if (bits < 15) {
-            hold += (unsigned long)(*in++) << bits;
-            bits += 8;
-            hold += (unsigned long)(*in++) << bits;
-            bits += 8;
-        }
-        here = lcode[hold & lmask];
-      dolen:
-        op = (unsigned)(here.bits);
-        hold >>= op;
-        bits -= op;
-        op = (unsigned)(here.op);
-        if (op == 0) {                          /* literal */
-            Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
-                    "inflate:         literal '%c'\n" :
-                    "inflate:         literal 0x%02x\n", here.val));
-            *out++ = (unsigned char)(here.val);
-        }
-        else if (op & 16) {                     /* length base */
-            len = (unsigned)(here.val);
-            op &= 15;                           /* number of extra bits */
-            if (op) {
-                if (bits < op) {
-                    hold += (unsigned long)(*in++) << bits;
-                    bits += 8;
-                }
-                len += (unsigned)hold & ((1U << op) - 1);
-                hold >>= op;
-                bits -= op;
-            }
-            Tracevv((stderr, "inflate:         length %u\n", len));
-            if (bits < 15) {
-                hold += (unsigned long)(*in++) << bits;
-                bits += 8;
-                hold += (unsigned long)(*in++) << bits;
-                bits += 8;
-            }
-            here = dcode[hold & dmask];
-          dodist:
-            op = (unsigned)(here.bits);
-            hold >>= op;
-            bits -= op;
-            op = (unsigned)(here.op);
-            if (op & 16) {                      /* distance base */
-                dist = (unsigned)(here.val);
-                op &= 15;                       /* number of extra bits */
-                if (bits < op) {
-                    hold += (unsigned long)(*in++) << bits;
-                    bits += 8;
-                    if (bits < op) {
-                        hold += (unsigned long)(*in++) << bits;
-                        bits += 8;
-                    }
-                }
-                dist += (unsigned)hold & ((1U << op) - 1);
-#ifdef INFLATE_STRICT
-                if (dist > dmax) {
-                    strm->msg = (char *)"invalid distance too far back";
-                    state->mode = BAD;
-                    break;
-                }
-#endif
-                hold >>= op;
-                bits -= op;
-                Tracevv((stderr, "inflate:         distance %u\n", dist));
-                op = (unsigned)(out - beg);     /* max distance in output */
-                if (dist > op) {                /* see if copy from window */
-                    op = dist - op;             /* distance back in window */
-                    if (op > whave) {
-                        if (state->sane) {
-                            strm->msg =
-                                (char *)"invalid distance too far back";
-                            state->mode = BAD;
-                            break;
-                        }
-#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
-                        if (len <= op - whave) {
-                            do {
-                                *out++ = 0;
-                            } while (--len);
-                            continue;
-                        }
-                        len -= op - whave;
-                        do {
-                            *out++ = 0;
-                        } while (--op > whave);
-                        if (op == 0) {
-                            from = out - dist;
-                            do {
-                                *out++ = *from++;
-                            } while (--len);
-                            continue;
-                        }
-#endif
-                    }
-                    from = window;
-                    if (wnext == 0) {           /* very common case */
-                        from += wsize - op;
-                        if (op < len) {         /* some from window */
-                            len -= op;
-                            do {
-                                *out++ = *from++;
-                            } while (--op);
-                            from = out - dist;  /* rest from output */
-                        }
-                    }
-                    else if (wnext < op) {      /* wrap around window */
-                        from += wsize + wnext - op;
-                        op -= wnext;
-                        if (op < len) {         /* some from end of window */
-                            len -= op;
-                            do {
-                                *out++ = *from++;
-                            } while (--op);
-                            from = window;
-                            if (wnext < len) {  /* some from start of window */
-                                op = wnext;
-                                len -= op;
-                                do {
-                                    *out++ = *from++;
-                                } while (--op);
-                                from = out - dist;      /* rest from output */
-                            }
-                        }
-                    }
-                    else {                      /* contiguous in window */
-                        from += wnext - op;
-                        if (op < len) {         /* some from window */
-                            len -= op;
-                            do {
-                                *out++ = *from++;
-                            } while (--op);
-                            from = out - dist;  /* rest from output */
-                        }
-                    }
-                    while (len > 2) {
-                        *out++ = *from++;
-                        *out++ = *from++;
-                        *out++ = *from++;
-                        len -= 3;
-                    }
-                    if (len) {
-                        *out++ = *from++;
-                        if (len > 1)
-                            *out++ = *from++;
-                    }
-                }
-                else {
-                    from = out - dist;          /* copy direct from output */
-                    do {                        /* minimum length is three */
-                        *out++ = *from++;
-                        *out++ = *from++;
-                        *out++ = *from++;
-                        len -= 3;
-                    } while (len > 2);
-                    if (len) {
-                        *out++ = *from++;
-                        if (len > 1)
-                            *out++ = *from++;
-                    }
-                }
-            }
-            else if ((op & 64) == 0) {          /* 2nd level distance code */
-                here = dcode[here.val + (hold & ((1U << op) - 1))];
-                goto dodist;
-            }
-            else {
-                strm->msg = (char *)"invalid distance code";
-                state->mode = BAD;
-                break;
-            }
-        }
-        else if ((op & 64) == 0) {              /* 2nd level length code */
-            here = lcode[here.val + (hold & ((1U << op) - 1))];
-            goto dolen;
-        }
-        else if (op & 32) {                     /* end-of-block */
-            Tracevv((stderr, "inflate:         end of block\n"));
-            state->mode = TYPE;
-            break;
-        }
-        else {
-            strm->msg = (char *)"invalid literal/length code";
-            state->mode = BAD;
-            break;
-        }
-    } while (in < last && out < end);
-
-    /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
-    len = bits >> 3;
-    in -= len;
-    bits -= len << 3;
-    hold &= (1U << bits) - 1;
-
-    /* update state and return */
-    strm->next_in = in;
-    strm->next_out = out;
-    strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
-    strm->avail_out = (unsigned)(out < end ?
-                                 257 + (end - out) : 257 - (out - end));
-    state->hold = hold;
-    state->bits = bits;
-    return;
-}
-
-/*
-   inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
-   - Using bit fields for code structure
-   - Different op definition to avoid & for extra bits (do & for table bits)
-   - Three separate decoding do-loops for direct, window, and wnext == 0
-   - Special case for distance > 1 copies to do overlapped load and store copy
-   - Explicit branch predictions (based on measured branch probabilities)
-   - Deferring match copy and interspersed it with decoding subsequent codes
-   - Swapping literal/length else
-   - Swapping window/direct else
-   - Larger unrolled copy loops (three is about right)
-   - Moving len -= 3 statement into middle of loop
- */
-
-#endif /* !ASMINF */
diff --git a/dep/zlib/src/inffast.h b/dep/zlib/src/inffast.h
deleted file mode 100644
index e5c1aa4ca..000000000
--- a/dep/zlib/src/inffast.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* inffast.h -- header to use inffast.c
- * Copyright (C) 1995-2003, 2010 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/* WARNING: this file should *not* be used by applications. It is
-   part of the implementation of the compression library and is
-   subject to change. Applications should only use zlib.h.
- */
-
-void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start));
diff --git a/dep/zlib/src/inffixed.h b/dep/zlib/src/inffixed.h
deleted file mode 100644
index d62832776..000000000
--- a/dep/zlib/src/inffixed.h
+++ /dev/null
@@ -1,94 +0,0 @@
-    /* inffixed.h -- table for decoding fixed codes
-     * Generated automatically by makefixed().
-     */
-
-    /* WARNING: this file should *not* be used by applications.
-       It is part of the implementation of this library and is
-       subject to change. Applications should only use zlib.h.
-     */
-
-    static const code lenfix[512] = {
-        {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48},
-        {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128},
-        {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59},
-        {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176},
-        {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20},
-        {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100},
-        {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8},
-        {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216},
-        {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76},
-        {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114},
-        {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2},
-        {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148},
-        {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42},
-        {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86},
-        {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15},
-        {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236},
-        {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62},
-        {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142},
-        {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31},
-        {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162},
-        {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25},
-        {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105},
-        {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4},
-        {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202},
-        {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69},
-        {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125},
-        {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13},
-        {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195},
-        {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35},
-        {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91},
-        {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19},
-        {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246},
-        {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55},
-        {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135},
-        {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99},
-        {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190},
-        {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16},
-        {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96},
-        {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6},
-        {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209},
-        {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72},
-        {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116},
-        {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4},
-        {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153},
-        {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44},
-        {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82},
-        {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11},
-        {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229},
-        {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58},
-        {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138},
-        {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51},
-        {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173},
-        {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30},
-        {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110},
-        {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0},
-        {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195},
-        {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65},
-        {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121},
-        {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9},
-        {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258},
-        {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37},
-        {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93},
-        {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23},
-        {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251},
-        {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51},
-        {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131},
-        {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67},
-        {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183},
-        {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23},
-        {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103},
-        {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9},
-        {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223},
-        {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79},
-        {0,9,255}
-    };
-
-    static const code distfix[32] = {
-        {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025},
-        {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193},
-        {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385},
-        {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577},
-        {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073},
-        {22,5,193},{64,5,0}
-    };
diff --git a/dep/zlib/src/inflate.c b/dep/zlib/src/inflate.c
deleted file mode 100644
index ac333e8c2..000000000
--- a/dep/zlib/src/inflate.c
+++ /dev/null
@@ -1,1561 +0,0 @@
-/* inflate.c -- zlib decompression
- * Copyright (C) 1995-2016 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/*
- * Change history:
- *
- * 1.2.beta0    24 Nov 2002
- * - First version -- complete rewrite of inflate to simplify code, avoid
- *   creation of window when not needed, minimize use of window when it is
- *   needed, make inffast.c even faster, implement gzip decoding, and to
- *   improve code readability and style over the previous zlib inflate code
- *
- * 1.2.beta1    25 Nov 2002
- * - Use pointers for available input and output checking in inffast.c
- * - Remove input and output counters in inffast.c
- * - Change inffast.c entry and loop from avail_in >= 7 to >= 6
- * - Remove unnecessary second byte pull from length extra in inffast.c
- * - Unroll direct copy to three copies per loop in inffast.c
- *
- * 1.2.beta2    4 Dec 2002
- * - Change external routine names to reduce potential conflicts
- * - Correct filename to inffixed.h for fixed tables in inflate.c
- * - Make hbuf[] unsigned char to match parameter type in inflate.c
- * - Change strm->next_out[-state->offset] to *(strm->next_out - state->offset)
- *   to avoid negation problem on Alphas (64 bit) in inflate.c
- *
- * 1.2.beta3    22 Dec 2002
- * - Add comments on state->bits assertion in inffast.c
- * - Add comments on op field in inftrees.h
- * - Fix bug in reuse of allocated window after inflateReset()
- * - Remove bit fields--back to byte structure for speed
- * - Remove distance extra == 0 check in inflate_fast()--only helps for lengths
- * - Change post-increments to pre-increments in inflate_fast(), PPC biased?
- * - Add compile time option, POSTINC, to use post-increments instead (Intel?)
- * - Make MATCH copy in inflate() much faster for when inflate_fast() not used
- * - Use local copies of stream next and avail values, as well as local bit
- *   buffer and bit count in inflate()--for speed when inflate_fast() not used
- *
- * 1.2.beta4    1 Jan 2003
- * - Split ptr - 257 statements in inflate_table() to avoid compiler warnings
- * - Move a comment on output buffer sizes from inffast.c to inflate.c
- * - Add comments in inffast.c to introduce the inflate_fast() routine
- * - Rearrange window copies in inflate_fast() for speed and simplification
- * - Unroll last copy for window match in inflate_fast()
- * - Use local copies of window variables in inflate_fast() for speed
- * - Pull out common wnext == 0 case for speed in inflate_fast()
- * - Make op and len in inflate_fast() unsigned for consistency
- * - Add FAR to lcode and dcode declarations in inflate_fast()
- * - Simplified bad distance check in inflate_fast()
- * - Added inflateBackInit(), inflateBack(), and inflateBackEnd() in new
- *   source file infback.c to provide a call-back interface to inflate for
- *   programs like gzip and unzip -- uses window as output buffer to avoid
- *   window copying
- *
- * 1.2.beta5    1 Jan 2003
- * - Improved inflateBack() interface to allow the caller to provide initial
- *   input in strm.
- * - Fixed stored blocks bug in inflateBack()
- *
- * 1.2.beta6    4 Jan 2003
- * - Added comments in inffast.c on effectiveness of POSTINC
- * - Typecasting all around to reduce compiler warnings
- * - Changed loops from while (1) or do {} while (1) to for (;;), again to
- *   make compilers happy
- * - Changed type of window in inflateBackInit() to unsigned char *
- *
- * 1.2.beta7    27 Jan 2003
- * - Changed many types to unsigned or unsigned short to avoid warnings
- * - Added inflateCopy() function
- *
- * 1.2.0        9 Mar 2003
- * - Changed inflateBack() interface to provide separate opaque descriptors
- *   for the in() and out() functions
- * - Changed inflateBack() argument and in_func typedef to swap the length
- *   and buffer address return values for the input function
- * - Check next_in and next_out for Z_NULL on entry to inflate()
- *
- * The history for versions after 1.2.0 are in ChangeLog in zlib distribution.
- */
-
-#include "zutil.h"
-#include "inftrees.h"
-#include "inflate.h"
-#include "inffast.h"
-
-#ifdef MAKEFIXED
-#  ifndef BUILDFIXED
-#    define BUILDFIXED
-#  endif
-#endif
-
-/* function prototypes */
-local int inflateStateCheck OF((z_streamp strm));
-local void fixedtables OF((struct inflate_state FAR *state));
-local int updatewindow OF((z_streamp strm, const unsigned char FAR *end,
-                           unsigned copy));
-#ifdef BUILDFIXED
-   void makefixed OF((void));
-#endif
-local unsigned syncsearch OF((unsigned FAR *have, const unsigned char FAR *buf,
-                              unsigned len));
-
-local int inflateStateCheck(strm)
-z_streamp strm;
-{
-    struct inflate_state FAR *state;
-    if (strm == Z_NULL ||
-        strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0)
-        return 1;
-    state = (struct inflate_state FAR *)strm->state;
-    if (state == Z_NULL || state->strm != strm ||
-        state->mode < HEAD || state->mode > SYNC)
-        return 1;
-    return 0;
-}
-
-int ZEXPORT inflateResetKeep(strm)
-z_streamp strm;
-{
-    struct inflate_state FAR *state;
-
-    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-    state = (struct inflate_state FAR *)strm->state;
-    strm->total_in = strm->total_out = state->total = 0;
-    strm->msg = Z_NULL;
-    if (state->wrap)        /* to support ill-conceived Java test suite */
-        strm->adler = state->wrap & 1;
-    state->mode = HEAD;
-    state->last = 0;
-    state->havedict = 0;
-    state->dmax = 32768U;
-    state->head = Z_NULL;
-    state->hold = 0;
-    state->bits = 0;
-    state->lencode = state->distcode = state->next = state->codes;
-    state->sane = 1;
-    state->back = -1;
-    Tracev((stderr, "inflate: reset\n"));
-    return Z_OK;
-}
-
-int ZEXPORT inflateReset(strm)
-z_streamp strm;
-{
-    struct inflate_state FAR *state;
-
-    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-    state = (struct inflate_state FAR *)strm->state;
-    state->wsize = 0;
-    state->whave = 0;
-    state->wnext = 0;
-    return inflateResetKeep(strm);
-}
-
-int ZEXPORT inflateReset2(strm, windowBits)
-z_streamp strm;
-int windowBits;
-{
-    int wrap;
-    struct inflate_state FAR *state;
-
-    /* get the state */
-    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-    state = (struct inflate_state FAR *)strm->state;
-
-    /* extract wrap request from windowBits parameter */
-    if (windowBits < 0) {
-        wrap = 0;
-        windowBits = -windowBits;
-    }
-    else {
-        wrap = (windowBits >> 4) + 5;
-#ifdef GUNZIP
-        if (windowBits < 48)
-            windowBits &= 15;
-#endif
-    }
-
-    /* set number of window bits, free window if different */
-    if (windowBits && (windowBits < 8 || windowBits > 15))
-        return Z_STREAM_ERROR;
-    if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) {
-        ZFREE(strm, state->window);
-        state->window = Z_NULL;
-    }
-
-    /* update state and reset the rest of it */
-    state->wrap = wrap;
-    state->wbits = (unsigned)windowBits;
-    return inflateReset(strm);
-}
-
-int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size)
-z_streamp strm;
-int windowBits;
-const char *version;
-int stream_size;
-{
-    int ret;
-    struct inflate_state FAR *state;
-
-    if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
-        stream_size != (int)(sizeof(z_stream)))
-        return Z_VERSION_ERROR;
-    if (strm == Z_NULL) return Z_STREAM_ERROR;
-    strm->msg = Z_NULL;                 /* in case we return an error */
-    if (strm->zalloc == (alloc_func)0) {
-#ifdef Z_SOLO
-        return Z_STREAM_ERROR;
-#else
-        strm->zalloc = zcalloc;
-        strm->opaque = (voidpf)0;
-#endif
-    }
-    if (strm->zfree == (free_func)0)
-#ifdef Z_SOLO
-        return Z_STREAM_ERROR;
-#else
-        strm->zfree = zcfree;
-#endif
-    state = (struct inflate_state FAR *)
-            ZALLOC(strm, 1, sizeof(struct inflate_state));
-    if (state == Z_NULL) return Z_MEM_ERROR;
-    Tracev((stderr, "inflate: allocated\n"));
-    strm->state = (struct internal_state FAR *)state;
-    state->strm = strm;
-    state->window = Z_NULL;
-    state->mode = HEAD;     /* to pass state test in inflateReset2() */
-    ret = inflateReset2(strm, windowBits);
-    if (ret != Z_OK) {
-        ZFREE(strm, state);
-        strm->state = Z_NULL;
-    }
-    return ret;
-}
-
-int ZEXPORT inflateInit_(strm, version, stream_size)
-z_streamp strm;
-const char *version;
-int stream_size;
-{
-    return inflateInit2_(strm, DEF_WBITS, version, stream_size);
-}
-
-int ZEXPORT inflatePrime(strm, bits, value)
-z_streamp strm;
-int bits;
-int value;
-{
-    struct inflate_state FAR *state;
-
-    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-    state = (struct inflate_state FAR *)strm->state;
-    if (bits < 0) {
-        state->hold = 0;
-        state->bits = 0;
-        return Z_OK;
-    }
-    if (bits > 16 || state->bits + (uInt)bits > 32) return Z_STREAM_ERROR;
-    value &= (1L << bits) - 1;
-    state->hold += (unsigned)value << state->bits;
-    state->bits += (uInt)bits;
-    return Z_OK;
-}
-
-/*
-   Return state with length and distance decoding tables and index sizes set to
-   fixed code decoding.  Normally this returns fixed tables from inffixed.h.
-   If BUILDFIXED is defined, then instead this routine builds the tables the
-   first time it's called, and returns those tables the first time and
-   thereafter.  This reduces the size of the code by about 2K bytes, in
-   exchange for a little execution time.  However, BUILDFIXED should not be
-   used for threaded applications, since the rewriting of the tables and virgin
-   may not be thread-safe.
- */
-local void fixedtables(state)
-struct inflate_state FAR *state;
-{
-#ifdef BUILDFIXED
-    static int virgin = 1;
-    static code *lenfix, *distfix;
-    static code fixed[544];
-
-    /* build fixed huffman tables if first call (may not be thread safe) */
-    if (virgin) {
-        unsigned sym, bits;
-        static code *next;
-
-        /* literal/length table */
-        sym = 0;
-        while (sym < 144) state->lens[sym++] = 8;
-        while (sym < 256) state->lens[sym++] = 9;
-        while (sym < 280) state->lens[sym++] = 7;
-        while (sym < 288) state->lens[sym++] = 8;
-        next = fixed;
-        lenfix = next;
-        bits = 9;
-        inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work);
-
-        /* distance table */
-        sym = 0;
-        while (sym < 32) state->lens[sym++] = 5;
-        distfix = next;
-        bits = 5;
-        inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work);
-
-        /* do this just once */
-        virgin = 0;
-    }
-#else /* !BUILDFIXED */
-#   include "inffixed.h"
-#endif /* BUILDFIXED */
-    state->lencode = lenfix;
-    state->lenbits = 9;
-    state->distcode = distfix;
-    state->distbits = 5;
-}
-
-#ifdef MAKEFIXED
-#include <stdio.h>
-
-/*
-   Write out the inffixed.h that is #include'd above.  Defining MAKEFIXED also
-   defines BUILDFIXED, so the tables are built on the fly.  makefixed() writes
-   those tables to stdout, which would be piped to inffixed.h.  A small program
-   can simply call makefixed to do this:
-
-    void makefixed(void);
-
-    int main(void)
-    {
-        makefixed();
-        return 0;
-    }
-
-   Then that can be linked with zlib built with MAKEFIXED defined and run:
-
-    a.out > inffixed.h
- */
-void makefixed()
-{
-    unsigned low, size;
-    struct inflate_state state;
-
-    fixedtables(&state);
-    puts("    /* inffixed.h -- table for decoding fixed codes");
-    puts("     * Generated automatically by makefixed().");
-    puts("     */");
-    puts("");
-    puts("    /* WARNING: this file should *not* be used by applications.");
-    puts("       It is part of the implementation of this library and is");
-    puts("       subject to change. Applications should only use zlib.h.");
-    puts("     */");
-    puts("");
-    size = 1U << 9;
-    printf("    static const code lenfix[%u] = {", size);
-    low = 0;
-    for (;;) {
-        if ((low % 7) == 0) printf("\n        ");
-        printf("{%u,%u,%d}", (low & 127) == 99 ? 64 : state.lencode[low].op,
-               state.lencode[low].bits, state.lencode[low].val);
-        if (++low == size) break;
-        putchar(',');
-    }
-    puts("\n    };");
-    size = 1U << 5;
-    printf("\n    static const code distfix[%u] = {", size);
-    low = 0;
-    for (;;) {
-        if ((low % 6) == 0) printf("\n        ");
-        printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits,
-               state.distcode[low].val);
-        if (++low == size) break;
-        putchar(',');
-    }
-    puts("\n    };");
-}
-#endif /* MAKEFIXED */
-
-/*
-   Update the window with the last wsize (normally 32K) bytes written before
-   returning.  If window does not exist yet, create it.  This is only called
-   when a window is already in use, or when output has been written during this
-   inflate call, but the end of the deflate stream has not been reached yet.
-   It is also called to create a window for dictionary data when a dictionary
-   is loaded.
-
-   Providing output buffers larger than 32K to inflate() should provide a speed
-   advantage, since only the last 32K of output is copied to the sliding window
-   upon return from inflate(), and since all distances after the first 32K of
-   output will fall in the output data, making match copies simpler and faster.
-   The advantage may be dependent on the size of the processor's data caches.
- */
-local int updatewindow(strm, end, copy)
-z_streamp strm;
-const Bytef *end;
-unsigned copy;
-{
-    struct inflate_state FAR *state;
-    unsigned dist;
-
-    state = (struct inflate_state FAR *)strm->state;
-
-    /* if it hasn't been done already, allocate space for the window */
-    if (state->window == Z_NULL) {
-        state->window = (unsigned char FAR *)
-                        ZALLOC(strm, 1U << state->wbits,
-                               sizeof(unsigned char));
-        if (state->window == Z_NULL) return 1;
-    }
-
-    /* if window not in use yet, initialize */
-    if (state->wsize == 0) {
-        state->wsize = 1U << state->wbits;
-        state->wnext = 0;
-        state->whave = 0;
-    }
-
-    /* copy state->wsize or less output bytes into the circular window */
-    if (copy >= state->wsize) {
-        zmemcpy(state->window, end - state->wsize, state->wsize);
-        state->wnext = 0;
-        state->whave = state->wsize;
-    }
-    else {
-        dist = state->wsize - state->wnext;
-        if (dist > copy) dist = copy;
-        zmemcpy(state->window + state->wnext, end - copy, dist);
-        copy -= dist;
-        if (copy) {
-            zmemcpy(state->window, end - copy, copy);
-            state->wnext = copy;
-            state->whave = state->wsize;
-        }
-        else {
-            state->wnext += dist;
-            if (state->wnext == state->wsize) state->wnext = 0;
-            if (state->whave < state->wsize) state->whave += dist;
-        }
-    }
-    return 0;
-}
-
-/* Macros for inflate(): */
-
-/* check function to use adler32() for zlib or crc32() for gzip */
-#ifdef GUNZIP
-#  define UPDATE(check, buf, len) \
-    (state->flags ? crc32(check, buf, len) : adler32(check, buf, len))
-#else
-#  define UPDATE(check, buf, len) adler32(check, buf, len)
-#endif
-
-/* check macros for header crc */
-#ifdef GUNZIP
-#  define CRC2(check, word) \
-    do { \
-        hbuf[0] = (unsigned char)(word); \
-        hbuf[1] = (unsigned char)((word) >> 8); \
-        check = crc32(check, hbuf, 2); \
-    } while (0)
-
-#  define CRC4(check, word) \
-    do { \
-        hbuf[0] = (unsigned char)(word); \
-        hbuf[1] = (unsigned char)((word) >> 8); \
-        hbuf[2] = (unsigned char)((word) >> 16); \
-        hbuf[3] = (unsigned char)((word) >> 24); \
-        check = crc32(check, hbuf, 4); \
-    } while (0)
-#endif
-
-/* Load registers with state in inflate() for speed */
-#define LOAD() \
-    do { \
-        put = strm->next_out; \
-        left = strm->avail_out; \
-        next = strm->next_in; \
-        have = strm->avail_in; \
-        hold = state->hold; \
-        bits = state->bits; \
-    } while (0)
-
-/* Restore state from registers in inflate() */
-#define RESTORE() \
-    do { \
-        strm->next_out = put; \
-        strm->avail_out = left; \
-        strm->next_in = next; \
-        strm->avail_in = have; \
-        state->hold = hold; \
-        state->bits = bits; \
-    } while (0)
-
-/* Clear the input bit accumulator */
-#define INITBITS() \
-    do { \
-        hold = 0; \
-        bits = 0; \
-    } while (0)
-
-/* Get a byte of input into the bit accumulator, or return from inflate()
-   if there is no input available. */
-#define PULLBYTE() \
-    do { \
-        if (have == 0) goto inf_leave; \
-        have--; \
-        hold += (unsigned long)(*next++) << bits; \
-        bits += 8; \
-    } while (0)
-
-/* Assure that there are at least n bits in the bit accumulator.  If there is
-   not enough available input to do that, then return from inflate(). */
-#define NEEDBITS(n) \
-    do { \
-        while (bits < (unsigned)(n)) \
-            PULLBYTE(); \
-    } while (0)
-
-/* Return the low n bits of the bit accumulator (n < 16) */
-#define BITS(n) \
-    ((unsigned)hold & ((1U << (n)) - 1))
-
-/* Remove n bits from the bit accumulator */
-#define DROPBITS(n) \
-    do { \
-        hold >>= (n); \
-        bits -= (unsigned)(n); \
-    } while (0)
-
-/* Remove zero to seven bits as needed to go to a byte boundary */
-#define BYTEBITS() \
-    do { \
-        hold >>= bits & 7; \
-        bits -= bits & 7; \
-    } while (0)
-
-/*
-   inflate() uses a state machine to process as much input data and generate as
-   much output data as possible before returning.  The state machine is
-   structured roughly as follows:
-
-    for (;;) switch (state) {
-    ...
-    case STATEn:
-        if (not enough input data or output space to make progress)
-            return;
-        ... make progress ...
-        state = STATEm;
-        break;
-    ...
-    }
-
-   so when inflate() is called again, the same case is attempted again, and
-   if the appropriate resources are provided, the machine proceeds to the
-   next state.  The NEEDBITS() macro is usually the way the state evaluates
-   whether it can proceed or should return.  NEEDBITS() does the return if
-   the requested bits are not available.  The typical use of the BITS macros
-   is:
-
-        NEEDBITS(n);
-        ... do something with BITS(n) ...
-        DROPBITS(n);
-
-   where NEEDBITS(n) either returns from inflate() if there isn't enough
-   input left to load n bits into the accumulator, or it continues.  BITS(n)
-   gives the low n bits in the accumulator.  When done, DROPBITS(n) drops
-   the low n bits off the accumulator.  INITBITS() clears the accumulator
-   and sets the number of available bits to zero.  BYTEBITS() discards just
-   enough bits to put the accumulator on a byte boundary.  After BYTEBITS()
-   and a NEEDBITS(8), then BITS(8) would return the next byte in the stream.
-
-   NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return
-   if there is no input available.  The decoding of variable length codes uses
-   PULLBYTE() directly in order to pull just enough bytes to decode the next
-   code, and no more.
-
-   Some states loop until they get enough input, making sure that enough
-   state information is maintained to continue the loop where it left off
-   if NEEDBITS() returns in the loop.  For example, want, need, and keep
-   would all have to actually be part of the saved state in case NEEDBITS()
-   returns:
-
-    case STATEw:
-        while (want < need) {
-            NEEDBITS(n);
-            keep[want++] = BITS(n);
-            DROPBITS(n);
-        }
-        state = STATEx;
-    case STATEx:
-
-   As shown above, if the next state is also the next case, then the break
-   is omitted.
-
-   A state may also return if there is not enough output space available to
-   complete that state.  Those states are copying stored data, writing a
-   literal byte, and copying a matching string.
-
-   When returning, a "goto inf_leave" is used to update the total counters,
-   update the check value, and determine whether any progress has been made
-   during that inflate() call in order to return the proper return code.
-   Progress is defined as a change in either strm->avail_in or strm->avail_out.
-   When there is a window, goto inf_leave will update the window with the last
-   output written.  If a goto inf_leave occurs in the middle of decompression
-   and there is no window currently, goto inf_leave will create one and copy
-   output to the window for the next call of inflate().
-
-   In this implementation, the flush parameter of inflate() only affects the
-   return code (per zlib.h).  inflate() always writes as much as possible to
-   strm->next_out, given the space available and the provided input--the effect
-   documented in zlib.h of Z_SYNC_FLUSH.  Furthermore, inflate() always defers
-   the allocation of and copying into a sliding window until necessary, which
-   provides the effect documented in zlib.h for Z_FINISH when the entire input
-   stream available.  So the only thing the flush parameter actually does is:
-   when flush is set to Z_FINISH, inflate() cannot return Z_OK.  Instead it
-   will return Z_BUF_ERROR if it has not reached the end of the stream.
- */
-
-int ZEXPORT inflate(strm, flush)
-z_streamp strm;
-int flush;
-{
-    struct inflate_state FAR *state;
-    z_const unsigned char FAR *next;    /* next input */
-    unsigned char FAR *put;     /* next output */
-    unsigned have, left;        /* available input and output */
-    unsigned long hold;         /* bit buffer */
-    unsigned bits;              /* bits in bit buffer */
-    unsigned in, out;           /* save starting available input and output */
-    unsigned copy;              /* number of stored or match bytes to copy */
-    unsigned char FAR *from;    /* where to copy match bytes from */
-    code here;                  /* current decoding table entry */
-    code last;                  /* parent table entry */
-    unsigned len;               /* length to copy for repeats, bits to drop */
-    int ret;                    /* return code */
-#ifdef GUNZIP
-    unsigned char hbuf[4];      /* buffer for gzip header crc calculation */
-#endif
-    static const unsigned short order[19] = /* permutation of code lengths */
-        {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
-
-    if (inflateStateCheck(strm) || strm->next_out == Z_NULL ||
-        (strm->next_in == Z_NULL && strm->avail_in != 0))
-        return Z_STREAM_ERROR;
-
-    state = (struct inflate_state FAR *)strm->state;
-    if (state->mode == TYPE) state->mode = TYPEDO;      /* skip check */
-    LOAD();
-    in = have;
-    out = left;
-    ret = Z_OK;
-    for (;;)
-        switch (state->mode) {
-        case HEAD:
-            if (state->wrap == 0) {
-                state->mode = TYPEDO;
-                break;
-            }
-            NEEDBITS(16);
-#ifdef GUNZIP
-            if ((state->wrap & 2) && hold == 0x8b1f) {  /* gzip header */
-                if (state->wbits == 0)
-                    state->wbits = 15;
-                state->check = crc32(0L, Z_NULL, 0);
-                CRC2(state->check, hold);
-                INITBITS();
-                state->mode = FLAGS;
-                break;
-            }
-            state->flags = 0;           /* expect zlib header */
-            if (state->head != Z_NULL)
-                state->head->done = -1;
-            if (!(state->wrap & 1) ||   /* check if zlib header allowed */
-#else
-            if (
-#endif
-                ((BITS(8) << 8) + (hold >> 8)) % 31) {
-                strm->msg = (char *)"incorrect header check";
-                state->mode = BAD;
-                break;
-            }
-            if (BITS(4) != Z_DEFLATED) {
-                strm->msg = (char *)"unknown compression method";
-                state->mode = BAD;
-                break;
-            }
-            DROPBITS(4);
-            len = BITS(4) + 8;
-            if (state->wbits == 0)
-                state->wbits = len;
-            if (len > 15 || len > state->wbits) {
-                strm->msg = (char *)"invalid window size";
-                state->mode = BAD;
-                break;
-            }
-            state->dmax = 1U << len;
-            Tracev((stderr, "inflate:   zlib header ok\n"));
-            strm->adler = state->check = adler32(0L, Z_NULL, 0);
-            state->mode = hold & 0x200 ? DICTID : TYPE;
-            INITBITS();
-            break;
-#ifdef GUNZIP
-        case FLAGS:
-            NEEDBITS(16);
-            state->flags = (int)(hold);
-            if ((state->flags & 0xff) != Z_DEFLATED) {
-                strm->msg = (char *)"unknown compression method";
-                state->mode = BAD;
-                break;
-            }
-            if (state->flags & 0xe000) {
-                strm->msg = (char *)"unknown header flags set";
-                state->mode = BAD;
-                break;
-            }
-            if (state->head != Z_NULL)
-                state->head->text = (int)((hold >> 8) & 1);
-            if ((state->flags & 0x0200) && (state->wrap & 4))
-                CRC2(state->check, hold);
-            INITBITS();
-            state->mode = TIME;
-        case TIME:
-            NEEDBITS(32);
-            if (state->head != Z_NULL)
-                state->head->time = hold;
-            if ((state->flags & 0x0200) && (state->wrap & 4))
-                CRC4(state->check, hold);
-            INITBITS();
-            state->mode = OS;
-        case OS:
-            NEEDBITS(16);
-            if (state->head != Z_NULL) {
-                state->head->xflags = (int)(hold & 0xff);
-                state->head->os = (int)(hold >> 8);
-            }
-            if ((state->flags & 0x0200) && (state->wrap & 4))
-                CRC2(state->check, hold);
-            INITBITS();
-            state->mode = EXLEN;
-        case EXLEN:
-            if (state->flags & 0x0400) {
-                NEEDBITS(16);
-                state->length = (unsigned)(hold);
-                if (state->head != Z_NULL)
-                    state->head->extra_len = (unsigned)hold;
-                if ((state->flags & 0x0200) && (state->wrap & 4))
-                    CRC2(state->check, hold);
-                INITBITS();
-            }
-            else if (state->head != Z_NULL)
-                state->head->extra = Z_NULL;
-            state->mode = EXTRA;
-        case EXTRA:
-            if (state->flags & 0x0400) {
-                copy = state->length;
-                if (copy > have) copy = have;
-                if (copy) {
-                    if (state->head != Z_NULL &&
-                        state->head->extra != Z_NULL) {
-                        len = state->head->extra_len - state->length;
-                        zmemcpy(state->head->extra + len, next,
-                                len + copy > state->head->extra_max ?
-                                state->head->extra_max - len : copy);
-                    }
-                    if ((state->flags & 0x0200) && (state->wrap & 4))
-                        state->check = crc32(state->check, next, copy);
-                    have -= copy;
-                    next += copy;
-                    state->length -= copy;
-                }
-                if (state->length) goto inf_leave;
-            }
-            state->length = 0;
-            state->mode = NAME;
-        case NAME:
-            if (state->flags & 0x0800) {
-                if (have == 0) goto inf_leave;
-                copy = 0;
-                do {
-                    len = (unsigned)(next[copy++]);
-                    if (state->head != Z_NULL &&
-                            state->head->name != Z_NULL &&
-                            state->length < state->head->name_max)
-                        state->head->name[state->length++] = (Bytef)len;
-                } while (len && copy < have);
-                if ((state->flags & 0x0200) && (state->wrap & 4))
-                    state->check = crc32(state->check, next, copy);
-                have -= copy;
-                next += copy;
-                if (len) goto inf_leave;
-            }
-            else if (state->head != Z_NULL)
-                state->head->name = Z_NULL;
-            state->length = 0;
-            state->mode = COMMENT;
-        case COMMENT:
-            if (state->flags & 0x1000) {
-                if (have == 0) goto inf_leave;
-                copy = 0;
-                do {
-                    len = (unsigned)(next[copy++]);
-                    if (state->head != Z_NULL &&
-                            state->head->comment != Z_NULL &&
-                            state->length < state->head->comm_max)
-                        state->head->comment[state->length++] = (Bytef)len;
-                } while (len && copy < have);
-                if ((state->flags & 0x0200) && (state->wrap & 4))
-                    state->check = crc32(state->check, next, copy);
-                have -= copy;
-                next += copy;
-                if (len) goto inf_leave;
-            }
-            else if (state->head != Z_NULL)
-                state->head->comment = Z_NULL;
-            state->mode = HCRC;
-        case HCRC:
-            if (state->flags & 0x0200) {
-                NEEDBITS(16);
-                if ((state->wrap & 4) && hold != (state->check & 0xffff)) {
-                    strm->msg = (char *)"header crc mismatch";
-                    state->mode = BAD;
-                    break;
-                }
-                INITBITS();
-            }
-            if (state->head != Z_NULL) {
-                state->head->hcrc = (int)((state->flags >> 9) & 1);
-                state->head->done = 1;
-            }
-            strm->adler = state->check = crc32(0L, Z_NULL, 0);
-            state->mode = TYPE;
-            break;
-#endif
-        case DICTID:
-            NEEDBITS(32);
-            strm->adler = state->check = ZSWAP32(hold);
-            INITBITS();
-            state->mode = DICT;
-        case DICT:
-            if (state->havedict == 0) {
-                RESTORE();
-                return Z_NEED_DICT;
-            }
-            strm->adler = state->check = adler32(0L, Z_NULL, 0);
-            state->mode = TYPE;
-        case TYPE:
-            if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave;
-        case TYPEDO:
-            if (state->last) {
-                BYTEBITS();
-                state->mode = CHECK;
-                break;
-            }
-            NEEDBITS(3);
-            state->last = BITS(1);
-            DROPBITS(1);
-            switch (BITS(2)) {
-            case 0:                             /* stored block */
-                Tracev((stderr, "inflate:     stored block%s\n",
-                        state->last ? " (last)" : ""));
-                state->mode = STORED;
-                break;
-            case 1:                             /* fixed block */
-                fixedtables(state);
-                Tracev((stderr, "inflate:     fixed codes block%s\n",
-                        state->last ? " (last)" : ""));
-                state->mode = LEN_;             /* decode codes */
-                if (flush == Z_TREES) {
-                    DROPBITS(2);
-                    goto inf_leave;
-                }
-                break;
-            case 2:                             /* dynamic block */
-                Tracev((stderr, "inflate:     dynamic codes block%s\n",
-                        state->last ? " (last)" : ""));
-                state->mode = TABLE;
-                break;
-            case 3:
-                strm->msg = (char *)"invalid block type";
-                state->mode = BAD;
-            }
-            DROPBITS(2);
-            break;
-        case STORED:
-            BYTEBITS();                         /* go to byte boundary */
-            NEEDBITS(32);
-            if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) {
-                strm->msg = (char *)"invalid stored block lengths";
-                state->mode = BAD;
-                break;
-            }
-            state->length = (unsigned)hold & 0xffff;
-            Tracev((stderr, "inflate:       stored length %u\n",
-                    state->length));
-            INITBITS();
-            state->mode = COPY_;
-            if (flush == Z_TREES) goto inf_leave;
-        case COPY_:
-            state->mode = COPY;
-        case COPY:
-            copy = state->length;
-            if (copy) {
-                if (copy > have) copy = have;
-                if (copy > left) copy = left;
-                if (copy == 0) goto inf_leave;
-                zmemcpy(put, next, copy);
-                have -= copy;
-                next += copy;
-                left -= copy;
-                put += copy;
-                state->length -= copy;
-                break;
-            }
-            Tracev((stderr, "inflate:       stored end\n"));
-            state->mode = TYPE;
-            break;
-        case TABLE:
-            NEEDBITS(14);
-            state->nlen = BITS(5) + 257;
-            DROPBITS(5);
-            state->ndist = BITS(5) + 1;
-            DROPBITS(5);
-            state->ncode = BITS(4) + 4;
-            DROPBITS(4);
-#ifndef PKZIP_BUG_WORKAROUND
-            if (state->nlen > 286 || state->ndist > 30) {
-                strm->msg = (char *)"too many length or distance symbols";
-                state->mode = BAD;
-                break;
-            }
-#endif
-            Tracev((stderr, "inflate:       table sizes ok\n"));
-            state->have = 0;
-            state->mode = LENLENS;
-        case LENLENS:
-            while (state->have < state->ncode) {
-                NEEDBITS(3);
-                state->lens[order[state->have++]] = (unsigned short)BITS(3);
-                DROPBITS(3);
-            }
-            while (state->have < 19)
-                state->lens[order[state->have++]] = 0;
-            state->next = state->codes;
-            state->lencode = (const code FAR *)(state->next);
-            state->lenbits = 7;
-            ret = inflate_table(CODES, state->lens, 19, &(state->next),
-                                &(state->lenbits), state->work);
-            if (ret) {
-                strm->msg = (char *)"invalid code lengths set";
-                state->mode = BAD;
-                break;
-            }
-            Tracev((stderr, "inflate:       code lengths ok\n"));
-            state->have = 0;
-            state->mode = CODELENS;
-        case CODELENS:
-            while (state->have < state->nlen + state->ndist) {
-                for (;;) {
-                    here = state->lencode[BITS(state->lenbits)];
-                    if ((unsigned)(here.bits) <= bits) break;
-                    PULLBYTE();
-                }
-                if (here.val < 16) {
-                    DROPBITS(here.bits);
-                    state->lens[state->have++] = here.val;
-                }
-                else {
-                    if (here.val == 16) {
-                        NEEDBITS(here.bits + 2);
-                        DROPBITS(here.bits);
-                        if (state->have == 0) {
-                            strm->msg = (char *)"invalid bit length repeat";
-                            state->mode = BAD;
-                            break;
-                        }
-                        len = state->lens[state->have - 1];
-                        copy = 3 + BITS(2);
-                        DROPBITS(2);
-                    }
-                    else if (here.val == 17) {
-                        NEEDBITS(here.bits + 3);
-                        DROPBITS(here.bits);
-                        len = 0;
-                        copy = 3 + BITS(3);
-                        DROPBITS(3);
-                    }
-                    else {
-                        NEEDBITS(here.bits + 7);
-                        DROPBITS(here.bits);
-                        len = 0;
-                        copy = 11 + BITS(7);
-                        DROPBITS(7);
-                    }
-                    if (state->have + copy > state->nlen + state->ndist) {
-                        strm->msg = (char *)"invalid bit length repeat";
-                        state->mode = BAD;
-                        break;
-                    }
-                    while (copy--)
-                        state->lens[state->have++] = (unsigned short)len;
-                }
-            }
-
-            /* handle error breaks in while */
-            if (state->mode == BAD) break;
-
-            /* check for end-of-block code (better have one) */
-            if (state->lens[256] == 0) {
-                strm->msg = (char *)"invalid code -- missing end-of-block";
-                state->mode = BAD;
-                break;
-            }
-
-            /* build code tables -- note: do not change the lenbits or distbits
-               values here (9 and 6) without reading the comments in inftrees.h
-               concerning the ENOUGH constants, which depend on those values */
-            state->next = state->codes;
-            state->lencode = (const code FAR *)(state->next);
-            state->lenbits = 9;
-            ret = inflate_table(LENS, state->lens, state->nlen, &(state->next),
-                                &(state->lenbits), state->work);
-            if (ret) {
-                strm->msg = (char *)"invalid literal/lengths set";
-                state->mode = BAD;
-                break;
-            }
-            state->distcode = (const code FAR *)(state->next);
-            state->distbits = 6;
-            ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist,
-                            &(state->next), &(state->distbits), state->work);
-            if (ret) {
-                strm->msg = (char *)"invalid distances set";
-                state->mode = BAD;
-                break;
-            }
-            Tracev((stderr, "inflate:       codes ok\n"));
-            state->mode = LEN_;
-            if (flush == Z_TREES) goto inf_leave;
-        case LEN_:
-            state->mode = LEN;
-        case LEN:
-            if (have >= 6 && left >= 258) {
-                RESTORE();
-                inflate_fast(strm, out);
-                LOAD();
-                if (state->mode == TYPE)
-                    state->back = -1;
-                break;
-            }
-            state->back = 0;
-            for (;;) {
-                here = state->lencode[BITS(state->lenbits)];
-                if ((unsigned)(here.bits) <= bits) break;
-                PULLBYTE();
-            }
-            if (here.op && (here.op & 0xf0) == 0) {
-                last = here;
-                for (;;) {
-                    here = state->lencode[last.val +
-                            (BITS(last.bits + last.op) >> last.bits)];
-                    if ((unsigned)(last.bits + here.bits) <= bits) break;
-                    PULLBYTE();
-                }
-                DROPBITS(last.bits);
-                state->back += last.bits;
-            }
-            DROPBITS(here.bits);
-            state->back += here.bits;
-            state->length = (unsigned)here.val;
-            if ((int)(here.op) == 0) {
-                Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
-                        "inflate:         literal '%c'\n" :
-                        "inflate:         literal 0x%02x\n", here.val));
-                state->mode = LIT;
-                break;
-            }
-            if (here.op & 32) {
-                Tracevv((stderr, "inflate:         end of block\n"));
-                state->back = -1;
-                state->mode = TYPE;
-                break;
-            }
-            if (here.op & 64) {
-                strm->msg = (char *)"invalid literal/length code";
-                state->mode = BAD;
-                break;
-            }
-            state->extra = (unsigned)(here.op) & 15;
-            state->mode = LENEXT;
-        case LENEXT:
-            if (state->extra) {
-                NEEDBITS(state->extra);
-                state->length += BITS(state->extra);
-                DROPBITS(state->extra);
-                state->back += state->extra;
-            }
-            Tracevv((stderr, "inflate:         length %u\n", state->length));
-            state->was = state->length;
-            state->mode = DIST;
-        case DIST:
-            for (;;) {
-                here = state->distcode[BITS(state->distbits)];
-                if ((unsigned)(here.bits) <= bits) break;
-                PULLBYTE();
-            }
-            if ((here.op & 0xf0) == 0) {
-                last = here;
-                for (;;) {
-                    here = state->distcode[last.val +
-                            (BITS(last.bits + last.op) >> last.bits)];
-                    if ((unsigned)(last.bits + here.bits) <= bits) break;
-                    PULLBYTE();
-                }
-                DROPBITS(last.bits);
-                state->back += last.bits;
-            }
-            DROPBITS(here.bits);
-            state->back += here.bits;
-            if (here.op & 64) {
-                strm->msg = (char *)"invalid distance code";
-                state->mode = BAD;
-                break;
-            }
-            state->offset = (unsigned)here.val;
-            state->extra = (unsigned)(here.op) & 15;
-            state->mode = DISTEXT;
-        case DISTEXT:
-            if (state->extra) {
-                NEEDBITS(state->extra);
-                state->offset += BITS(state->extra);
-                DROPBITS(state->extra);
-                state->back += state->extra;
-            }
-#ifdef INFLATE_STRICT
-            if (state->offset > state->dmax) {
-                strm->msg = (char *)"invalid distance too far back";
-                state->mode = BAD;
-                break;
-            }
-#endif
-            Tracevv((stderr, "inflate:         distance %u\n", state->offset));
-            state->mode = MATCH;
-        case MATCH:
-            if (left == 0) goto inf_leave;
-            copy = out - left;
-            if (state->offset > copy) {         /* copy from window */
-                copy = state->offset - copy;
-                if (copy > state->whave) {
-                    if (state->sane) {
-                        strm->msg = (char *)"invalid distance too far back";
-                        state->mode = BAD;
-                        break;
-                    }
-#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
-                    Trace((stderr, "inflate.c too far\n"));
-                    copy -= state->whave;
-                    if (copy > state->length) copy = state->length;
-                    if (copy > left) copy = left;
-                    left -= copy;
-                    state->length -= copy;
-                    do {
-                        *put++ = 0;
-                    } while (--copy);
-                    if (state->length == 0) state->mode = LEN;
-                    break;
-#endif
-                }
-                if (copy > state->wnext) {
-                    copy -= state->wnext;
-                    from = state->window + (state->wsize - copy);
-                }
-                else
-                    from = state->window + (state->wnext - copy);
-                if (copy > state->length) copy = state->length;
-            }
-            else {                              /* copy from output */
-                from = put - state->offset;
-                copy = state->length;
-            }
-            if (copy > left) copy = left;
-            left -= copy;
-            state->length -= copy;
-            do {
-                *put++ = *from++;
-            } while (--copy);
-            if (state->length == 0) state->mode = LEN;
-            break;
-        case LIT:
-            if (left == 0) goto inf_leave;
-            *put++ = (unsigned char)(state->length);
-            left--;
-            state->mode = LEN;
-            break;
-        case CHECK:
-            if (state->wrap) {
-                NEEDBITS(32);
-                out -= left;
-                strm->total_out += out;
-                state->total += out;
-                if ((state->wrap & 4) && out)
-                    strm->adler = state->check =
-                        UPDATE(state->check, put - out, out);
-                out = left;
-                if ((state->wrap & 4) && (
-#ifdef GUNZIP
-                     state->flags ? hold :
-#endif
-                     ZSWAP32(hold)) != state->check) {
-                    strm->msg = (char *)"incorrect data check";
-                    state->mode = BAD;
-                    break;
-                }
-                INITBITS();
-                Tracev((stderr, "inflate:   check matches trailer\n"));
-            }
-#ifdef GUNZIP
-            state->mode = LENGTH;
-        case LENGTH:
-            if (state->wrap && state->flags) {
-                NEEDBITS(32);
-                if (hold != (state->total & 0xffffffffUL)) {
-                    strm->msg = (char *)"incorrect length check";
-                    state->mode = BAD;
-                    break;
-                }
-                INITBITS();
-                Tracev((stderr, "inflate:   length matches trailer\n"));
-            }
-#endif
-            state->mode = DONE;
-        case DONE:
-            ret = Z_STREAM_END;
-            goto inf_leave;
-        case BAD:
-            ret = Z_DATA_ERROR;
-            goto inf_leave;
-        case MEM:
-            return Z_MEM_ERROR;
-        case SYNC:
-        default:
-            return Z_STREAM_ERROR;
-        }
-
-    /*
-       Return from inflate(), updating the total counts and the check value.
-       If there was no progress during the inflate() call, return a buffer
-       error.  Call updatewindow() to create and/or update the window state.
-       Note: a memory error from inflate() is non-recoverable.
-     */
-  inf_leave:
-    RESTORE();
-    if (state->wsize || (out != strm->avail_out && state->mode < BAD &&
-            (state->mode < CHECK || flush != Z_FINISH)))
-        if (updatewindow(strm, strm->next_out, out - strm->avail_out)) {
-            state->mode = MEM;
-            return Z_MEM_ERROR;
-        }
-    in -= strm->avail_in;
-    out -= strm->avail_out;
-    strm->total_in += in;
-    strm->total_out += out;
-    state->total += out;
-    if ((state->wrap & 4) && out)
-        strm->adler = state->check =
-            UPDATE(state->check, strm->next_out - out, out);
-    strm->data_type = (int)state->bits + (state->last ? 64 : 0) +
-                      (state->mode == TYPE ? 128 : 0) +
-                      (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0);
-    if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK)
-        ret = Z_BUF_ERROR;
-    return ret;
-}
-
-int ZEXPORT inflateEnd(strm)
-z_streamp strm;
-{
-    struct inflate_state FAR *state;
-    if (inflateStateCheck(strm))
-        return Z_STREAM_ERROR;
-    state = (struct inflate_state FAR *)strm->state;
-    if (state->window != Z_NULL) ZFREE(strm, state->window);
-    ZFREE(strm, strm->state);
-    strm->state = Z_NULL;
-    Tracev((stderr, "inflate: end\n"));
-    return Z_OK;
-}
-
-int ZEXPORT inflateGetDictionary(strm, dictionary, dictLength)
-z_streamp strm;
-Bytef *dictionary;
-uInt *dictLength;
-{
-    struct inflate_state FAR *state;
-
-    /* check state */
-    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-    state = (struct inflate_state FAR *)strm->state;
-
-    /* copy dictionary */
-    if (state->whave && dictionary != Z_NULL) {
-        zmemcpy(dictionary, state->window + state->wnext,
-                state->whave - state->wnext);
-        zmemcpy(dictionary + state->whave - state->wnext,
-                state->window, state->wnext);
-    }
-    if (dictLength != Z_NULL)
-        *dictLength = state->whave;
-    return Z_OK;
-}
-
-int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength)
-z_streamp strm;
-const Bytef *dictionary;
-uInt dictLength;
-{
-    struct inflate_state FAR *state;
-    unsigned long dictid;
-    int ret;
-
-    /* check state */
-    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-    state = (struct inflate_state FAR *)strm->state;
-    if (state->wrap != 0 && state->mode != DICT)
-        return Z_STREAM_ERROR;
-
-    /* check for correct dictionary identifier */
-    if (state->mode == DICT) {
-        dictid = adler32(0L, Z_NULL, 0);
-        dictid = adler32(dictid, dictionary, dictLength);
-        if (dictid != state->check)
-            return Z_DATA_ERROR;
-    }
-
-    /* copy dictionary to window using updatewindow(), which will amend the
-       existing dictionary if appropriate */
-    ret = updatewindow(strm, dictionary + dictLength, dictLength);
-    if (ret) {
-        state->mode = MEM;
-        return Z_MEM_ERROR;
-    }
-    state->havedict = 1;
-    Tracev((stderr, "inflate:   dictionary set\n"));
-    return Z_OK;
-}
-
-int ZEXPORT inflateGetHeader(strm, head)
-z_streamp strm;
-gz_headerp head;
-{
-    struct inflate_state FAR *state;
-
-    /* check state */
-    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-    state = (struct inflate_state FAR *)strm->state;
-    if ((state->wrap & 2) == 0) return Z_STREAM_ERROR;
-
-    /* save header structure */
-    state->head = head;
-    head->done = 0;
-    return Z_OK;
-}
-
-/*
-   Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff.  Return when found
-   or when out of input.  When called, *have is the number of pattern bytes
-   found in order so far, in 0..3.  On return *have is updated to the new
-   state.  If on return *have equals four, then the pattern was found and the
-   return value is how many bytes were read including the last byte of the
-   pattern.  If *have is less than four, then the pattern has not been found
-   yet and the return value is len.  In the latter case, syncsearch() can be
-   called again with more data and the *have state.  *have is initialized to
-   zero for the first call.
- */
-local unsigned syncsearch(have, buf, len)
-unsigned FAR *have;
-const unsigned char FAR *buf;
-unsigned len;
-{
-    unsigned got;
-    unsigned next;
-
-    got = *have;
-    next = 0;
-    while (next < len && got < 4) {
-        if ((int)(buf[next]) == (got < 2 ? 0 : 0xff))
-            got++;
-        else if (buf[next])
-            got = 0;
-        else
-            got = 4 - got;
-        next++;
-    }
-    *have = got;
-    return next;
-}
-
-int ZEXPORT inflateSync(strm)
-z_streamp strm;
-{
-    unsigned len;               /* number of bytes to look at or looked at */
-    unsigned long in, out;      /* temporary to save total_in and total_out */
-    unsigned char buf[4];       /* to restore bit buffer to byte string */
-    struct inflate_state FAR *state;
-
-    /* check parameters */
-    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-    state = (struct inflate_state FAR *)strm->state;
-    if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR;
-
-    /* if first time, start search in bit buffer */
-    if (state->mode != SYNC) {
-        state->mode = SYNC;
-        state->hold <<= state->bits & 7;
-        state->bits -= state->bits & 7;
-        len = 0;
-        while (state->bits >= 8) {
-            buf[len++] = (unsigned char)(state->hold);
-            state->hold >>= 8;
-            state->bits -= 8;
-        }
-        state->have = 0;
-        syncsearch(&(state->have), buf, len);
-    }
-
-    /* search available input */
-    len = syncsearch(&(state->have), strm->next_in, strm->avail_in);
-    strm->avail_in -= len;
-    strm->next_in += len;
-    strm->total_in += len;
-
-    /* return no joy or set up to restart inflate() on a new block */
-    if (state->have != 4) return Z_DATA_ERROR;
-    in = strm->total_in;  out = strm->total_out;
-    inflateReset(strm);
-    strm->total_in = in;  strm->total_out = out;
-    state->mode = TYPE;
-    return Z_OK;
-}
-
-/*
-   Returns true if inflate is currently at the end of a block generated by
-   Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP
-   implementation to provide an additional safety check. PPP uses
-   Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored
-   block. When decompressing, PPP checks that at the end of input packet,
-   inflate is waiting for these length bytes.
- */
-int ZEXPORT inflateSyncPoint(strm)
-z_streamp strm;
-{
-    struct inflate_state FAR *state;
-
-    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-    state = (struct inflate_state FAR *)strm->state;
-    return state->mode == STORED && state->bits == 0;
-}
-
-int ZEXPORT inflateCopy(dest, source)
-z_streamp dest;
-z_streamp source;
-{
-    struct inflate_state FAR *state;
-    struct inflate_state FAR *copy;
-    unsigned char FAR *window;
-    unsigned wsize;
-
-    /* check input */
-    if (inflateStateCheck(source) || dest == Z_NULL)
-        return Z_STREAM_ERROR;
-    state = (struct inflate_state FAR *)source->state;
-
-    /* allocate space */
-    copy = (struct inflate_state FAR *)
-           ZALLOC(source, 1, sizeof(struct inflate_state));
-    if (copy == Z_NULL) return Z_MEM_ERROR;
-    window = Z_NULL;
-    if (state->window != Z_NULL) {
-        window = (unsigned char FAR *)
-                 ZALLOC(source, 1U << state->wbits, sizeof(unsigned char));
-        if (window == Z_NULL) {
-            ZFREE(source, copy);
-            return Z_MEM_ERROR;
-        }
-    }
-
-    /* copy state */
-    zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream));
-    zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state));
-    copy->strm = dest;
-    if (state->lencode >= state->codes &&
-        state->lencode <= state->codes + ENOUGH - 1) {
-        copy->lencode = copy->codes + (state->lencode - state->codes);
-        copy->distcode = copy->codes + (state->distcode - state->codes);
-    }
-    copy->next = copy->codes + (state->next - state->codes);
-    if (window != Z_NULL) {
-        wsize = 1U << state->wbits;
-        zmemcpy(window, state->window, wsize);
-    }
-    copy->window = window;
-    dest->state = (struct internal_state FAR *)copy;
-    return Z_OK;
-}
-
-int ZEXPORT inflateUndermine(strm, subvert)
-z_streamp strm;
-int subvert;
-{
-    struct inflate_state FAR *state;
-
-    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-    state = (struct inflate_state FAR *)strm->state;
-#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
-    state->sane = !subvert;
-    return Z_OK;
-#else
-    (void)subvert;
-    state->sane = 1;
-    return Z_DATA_ERROR;
-#endif
-}
-
-int ZEXPORT inflateValidate(strm, check)
-z_streamp strm;
-int check;
-{
-    struct inflate_state FAR *state;
-
-    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-    state = (struct inflate_state FAR *)strm->state;
-    if (check)
-        state->wrap |= 4;
-    else
-        state->wrap &= ~4;
-    return Z_OK;
-}
-
-long ZEXPORT inflateMark(strm)
-z_streamp strm;
-{
-    struct inflate_state FAR *state;
-
-    if (inflateStateCheck(strm))
-        return -(1L << 16);
-    state = (struct inflate_state FAR *)strm->state;
-    return (long)(((unsigned long)((long)state->back)) << 16) +
-        (state->mode == COPY ? state->length :
-            (state->mode == MATCH ? state->was - state->length : 0));
-}
-
-unsigned long ZEXPORT inflateCodesUsed(strm)
-z_streamp strm;
-{
-    struct inflate_state FAR *state;
-    if (inflateStateCheck(strm)) return (unsigned long)-1;
-    state = (struct inflate_state FAR *)strm->state;
-    return (unsigned long)(state->next - state->codes);
-}
diff --git a/dep/zlib/src/inflate.h b/dep/zlib/src/inflate.h
deleted file mode 100644
index a46cce6b6..000000000
--- a/dep/zlib/src/inflate.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/* inflate.h -- internal inflate state definition
- * Copyright (C) 1995-2016 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/* WARNING: this file should *not* be used by applications. It is
-   part of the implementation of the compression library and is
-   subject to change. Applications should only use zlib.h.
- */
-
-/* define NO_GZIP when compiling if you want to disable gzip header and
-   trailer decoding by inflate().  NO_GZIP would be used to avoid linking in
-   the crc code when it is not needed.  For shared libraries, gzip decoding
-   should be left enabled. */
-#ifndef NO_GZIP
-#  define GUNZIP
-#endif
-
-/* Possible inflate modes between inflate() calls */
-typedef enum {
-    HEAD = 16180,   /* i: waiting for magic header */
-    FLAGS,      /* i: waiting for method and flags (gzip) */
-    TIME,       /* i: waiting for modification time (gzip) */
-    OS,         /* i: waiting for extra flags and operating system (gzip) */
-    EXLEN,      /* i: waiting for extra length (gzip) */
-    EXTRA,      /* i: waiting for extra bytes (gzip) */
-    NAME,       /* i: waiting for end of file name (gzip) */
-    COMMENT,    /* i: waiting for end of comment (gzip) */
-    HCRC,       /* i: waiting for header crc (gzip) */
-    DICTID,     /* i: waiting for dictionary check value */
-    DICT,       /* waiting for inflateSetDictionary() call */
-        TYPE,       /* i: waiting for type bits, including last-flag bit */
-        TYPEDO,     /* i: same, but skip check to exit inflate on new block */
-        STORED,     /* i: waiting for stored size (length and complement) */
-        COPY_,      /* i/o: same as COPY below, but only first time in */
-        COPY,       /* i/o: waiting for input or output to copy stored block */
-        TABLE,      /* i: waiting for dynamic block table lengths */
-        LENLENS,    /* i: waiting for code length code lengths */
-        CODELENS,   /* i: waiting for length/lit and distance code lengths */
-            LEN_,       /* i: same as LEN below, but only first time in */
-            LEN,        /* i: waiting for length/lit/eob code */
-            LENEXT,     /* i: waiting for length extra bits */
-            DIST,       /* i: waiting for distance code */
-            DISTEXT,    /* i: waiting for distance extra bits */
-            MATCH,      /* o: waiting for output space to copy string */
-            LIT,        /* o: waiting for output space to write literal */
-    CHECK,      /* i: waiting for 32-bit check value */
-    LENGTH,     /* i: waiting for 32-bit length (gzip) */
-    DONE,       /* finished check, done -- remain here until reset */
-    BAD,        /* got a data error -- remain here until reset */
-    MEM,        /* got an inflate() memory error -- remain here until reset */
-    SYNC        /* looking for synchronization bytes to restart inflate() */
-} inflate_mode;
-
-/*
-    State transitions between above modes -
-
-    (most modes can go to BAD or MEM on error -- not shown for clarity)
-
-    Process header:
-        HEAD -> (gzip) or (zlib) or (raw)
-        (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME -> COMMENT ->
-                  HCRC -> TYPE
-        (zlib) -> DICTID or TYPE
-        DICTID -> DICT -> TYPE
-        (raw) -> TYPEDO
-    Read deflate blocks:
-            TYPE -> TYPEDO -> STORED or TABLE or LEN_ or CHECK
-            STORED -> COPY_ -> COPY -> TYPE
-            TABLE -> LENLENS -> CODELENS -> LEN_
-            LEN_ -> LEN
-    Read deflate codes in fixed or dynamic block:
-                LEN -> LENEXT or LIT or TYPE
-                LENEXT -> DIST -> DISTEXT -> MATCH -> LEN
-                LIT -> LEN
-    Process trailer:
-        CHECK -> LENGTH -> DONE
- */
-
-/* State maintained between inflate() calls -- approximately 7K bytes, not
-   including the allocated sliding window, which is up to 32K bytes. */
-struct inflate_state {
-    z_streamp strm;             /* pointer back to this zlib stream */
-    inflate_mode mode;          /* current inflate mode */
-    int last;                   /* true if processing last block */
-    int wrap;                   /* bit 0 true for zlib, bit 1 true for gzip,
-                                   bit 2 true to validate check value */
-    int havedict;               /* true if dictionary provided */
-    int flags;                  /* gzip header method and flags (0 if zlib) */
-    unsigned dmax;              /* zlib header max distance (INFLATE_STRICT) */
-    unsigned long check;        /* protected copy of check value */
-    unsigned long total;        /* protected copy of output count */
-    gz_headerp head;            /* where to save gzip header information */
-        /* sliding window */
-    unsigned wbits;             /* log base 2 of requested window size */
-    unsigned wsize;             /* window size or zero if not using window */
-    unsigned whave;             /* valid bytes in the window */
-    unsigned wnext;             /* window write index */
-    unsigned char FAR *window;  /* allocated sliding window, if needed */
-        /* bit accumulator */
-    unsigned long hold;         /* input bit accumulator */
-    unsigned bits;              /* number of bits in "in" */
-        /* for string and stored block copying */
-    unsigned length;            /* literal or length of data to copy */
-    unsigned offset;            /* distance back to copy string from */
-        /* for table and code decoding */
-    unsigned extra;             /* extra bits needed */
-        /* fixed and dynamic code tables */
-    code const FAR *lencode;    /* starting table for length/literal codes */
-    code const FAR *distcode;   /* starting table for distance codes */
-    unsigned lenbits;           /* index bits for lencode */
-    unsigned distbits;          /* index bits for distcode */
-        /* dynamic table building */
-    unsigned ncode;             /* number of code length code lengths */
-    unsigned nlen;              /* number of length code lengths */
-    unsigned ndist;             /* number of distance code lengths */
-    unsigned have;              /* number of code lengths in lens[] */
-    code FAR *next;             /* next available space in codes[] */
-    unsigned short lens[320];   /* temporary storage for code lengths */
-    unsigned short work[288];   /* work area for code table building */
-    code codes[ENOUGH];         /* space for code tables */
-    int sane;                   /* if false, allow invalid distance too far */
-    int back;                   /* bits back of last unprocessed length/lit */
-    unsigned was;               /* initial length of match */
-};
diff --git a/dep/zlib/src/inftrees.c b/dep/zlib/src/inftrees.c
deleted file mode 100644
index 2ea08fc13..000000000
--- a/dep/zlib/src/inftrees.c
+++ /dev/null
@@ -1,304 +0,0 @@
-/* inftrees.c -- generate Huffman trees for efficient decoding
- * Copyright (C) 1995-2017 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#include "zutil.h"
-#include "inftrees.h"
-
-#define MAXBITS 15
-
-const char inflate_copyright[] =
-   " inflate 1.2.11 Copyright 1995-2017 Mark Adler ";
-/*
-  If you use the zlib library in a product, an acknowledgment is welcome
-  in the documentation of your product. If for some reason you cannot
-  include such an acknowledgment, I would appreciate that you keep this
-  copyright string in the executable of your product.
- */
-
-/*
-   Build a set of tables to decode the provided canonical Huffman code.
-   The code lengths are lens[0..codes-1].  The result starts at *table,
-   whose indices are 0..2^bits-1.  work is a writable array of at least
-   lens shorts, which is used as a work area.  type is the type of code
-   to be generated, CODES, LENS, or DISTS.  On return, zero is success,
-   -1 is an invalid code, and +1 means that ENOUGH isn't enough.  table
-   on return points to the next available entry's address.  bits is the
-   requested root table index bits, and on return it is the actual root
-   table index bits.  It will differ if the request is greater than the
-   longest code or if it is less than the shortest code.
- */
-int ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work)
-codetype type;
-unsigned short FAR *lens;
-unsigned codes;
-code FAR * FAR *table;
-unsigned FAR *bits;
-unsigned short FAR *work;
-{
-    unsigned len;               /* a code's length in bits */
-    unsigned sym;               /* index of code symbols */
-    unsigned min, max;          /* minimum and maximum code lengths */
-    unsigned root;              /* number of index bits for root table */
-    unsigned curr;              /* number of index bits for current table */
-    unsigned drop;              /* code bits to drop for sub-table */
-    int left;                   /* number of prefix codes available */
-    unsigned used;              /* code entries in table used */
-    unsigned huff;              /* Huffman code */
-    unsigned incr;              /* for incrementing code, index */
-    unsigned fill;              /* index for replicating entries */
-    unsigned low;               /* low bits for current root entry */
-    unsigned mask;              /* mask for low root bits */
-    code here;                  /* table entry for duplication */
-    code FAR *next;             /* next available space in table */
-    const unsigned short FAR *base;     /* base value table to use */
-    const unsigned short FAR *extra;    /* extra bits table to use */
-    unsigned match;             /* use base and extra for symbol >= match */
-    unsigned short count[MAXBITS+1];    /* number of codes of each length */
-    unsigned short offs[MAXBITS+1];     /* offsets in table for each length */
-    static const unsigned short lbase[31] = { /* Length codes 257..285 base */
-        3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
-        35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
-    static const unsigned short lext[31] = { /* Length codes 257..285 extra */
-        16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
-        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 202};
-    static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
-        1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
-        257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
-        8193, 12289, 16385, 24577, 0, 0};
-    static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
-        16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
-        23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
-        28, 28, 29, 29, 64, 64};
-
-    /*
-       Process a set of code lengths to create a canonical Huffman code.  The
-       code lengths are lens[0..codes-1].  Each length corresponds to the
-       symbols 0..codes-1.  The Huffman code is generated by first sorting the
-       symbols by length from short to long, and retaining the symbol order
-       for codes with equal lengths.  Then the code starts with all zero bits
-       for the first code of the shortest length, and the codes are integer
-       increments for the same length, and zeros are appended as the length
-       increases.  For the deflate format, these bits are stored backwards
-       from their more natural integer increment ordering, and so when the
-       decoding tables are built in the large loop below, the integer codes
-       are incremented backwards.
-
-       This routine assumes, but does not check, that all of the entries in
-       lens[] are in the range 0..MAXBITS.  The caller must assure this.
-       1..MAXBITS is interpreted as that code length.  zero means that that
-       symbol does not occur in this code.
-
-       The codes are sorted by computing a count of codes for each length,
-       creating from that a table of starting indices for each length in the
-       sorted table, and then entering the symbols in order in the sorted
-       table.  The sorted table is work[], with that space being provided by
-       the caller.
-
-       The length counts are used for other purposes as well, i.e. finding
-       the minimum and maximum length codes, determining if there are any
-       codes at all, checking for a valid set of lengths, and looking ahead
-       at length counts to determine sub-table sizes when building the
-       decoding tables.
-     */
-
-    /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
-    for (len = 0; len <= MAXBITS; len++)
-        count[len] = 0;
-    for (sym = 0; sym < codes; sym++)
-        count[lens[sym]]++;
-
-    /* bound code lengths, force root to be within code lengths */
-    root = *bits;
-    for (max = MAXBITS; max >= 1; max--)
-        if (count[max] != 0) break;
-    if (root > max) root = max;
-    if (max == 0) {                     /* no symbols to code at all */
-        here.op = (unsigned char)64;    /* invalid code marker */
-        here.bits = (unsigned char)1;
-        here.val = (unsigned short)0;
-        *(*table)++ = here;             /* make a table to force an error */
-        *(*table)++ = here;
-        *bits = 1;
-        return 0;     /* no symbols, but wait for decoding to report error */
-    }
-    for (min = 1; min < max; min++)
-        if (count[min] != 0) break;
-    if (root < min) root = min;
-
-    /* check for an over-subscribed or incomplete set of lengths */
-    left = 1;
-    for (len = 1; len <= MAXBITS; len++) {
-        left <<= 1;
-        left -= count[len];
-        if (left < 0) return -1;        /* over-subscribed */
-    }
-    if (left > 0 && (type == CODES || max != 1))
-        return -1;                      /* incomplete set */
-
-    /* generate offsets into symbol table for each length for sorting */
-    offs[1] = 0;
-    for (len = 1; len < MAXBITS; len++)
-        offs[len + 1] = offs[len] + count[len];
-
-    /* sort symbols by length, by symbol order within each length */
-    for (sym = 0; sym < codes; sym++)
-        if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
-
-    /*
-       Create and fill in decoding tables.  In this loop, the table being
-       filled is at next and has curr index bits.  The code being used is huff
-       with length len.  That code is converted to an index by dropping drop
-       bits off of the bottom.  For codes where len is less than drop + curr,
-       those top drop + curr - len bits are incremented through all values to
-       fill the table with replicated entries.
-
-       root is the number of index bits for the root table.  When len exceeds
-       root, sub-tables are created pointed to by the root entry with an index
-       of the low root bits of huff.  This is saved in low to check for when a
-       new sub-table should be started.  drop is zero when the root table is
-       being filled, and drop is root when sub-tables are being filled.
-
-       When a new sub-table is needed, it is necessary to look ahead in the
-       code lengths to determine what size sub-table is needed.  The length
-       counts are used for this, and so count[] is decremented as codes are
-       entered in the tables.
-
-       used keeps track of how many table entries have been allocated from the
-       provided *table space.  It is checked for LENS and DIST tables against
-       the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
-       the initial root table size constants.  See the comments in inftrees.h
-       for more information.
-
-       sym increments through all symbols, and the loop terminates when
-       all codes of length max, i.e. all codes, have been processed.  This
-       routine permits incomplete codes, so another loop after this one fills
-       in the rest of the decoding tables with invalid code markers.
-     */
-
-    /* set up for code type */
-    switch (type) {
-    case CODES:
-        base = extra = work;    /* dummy value--not used */
-        match = 20;
-        break;
-    case LENS:
-        base = lbase;
-        extra = lext;
-        match = 257;
-        break;
-    default:    /* DISTS */
-        base = dbase;
-        extra = dext;
-        match = 0;
-    }
-
-    /* initialize state for loop */
-    huff = 0;                   /* starting code */
-    sym = 0;                    /* starting code symbol */
-    len = min;                  /* starting code length */
-    next = *table;              /* current table to fill in */
-    curr = root;                /* current table index bits */
-    drop = 0;                   /* current bits to drop from code for index */
-    low = (unsigned)(-1);       /* trigger new sub-table when len > root */
-    used = 1U << root;          /* use root table entries */
-    mask = used - 1;            /* mask for comparing low */
-
-    /* check available table space */
-    if ((type == LENS && used > ENOUGH_LENS) ||
-        (type == DISTS && used > ENOUGH_DISTS))
-        return 1;
-
-    /* process all codes and make table entries */
-    for (;;) {
-        /* create table entry */
-        here.bits = (unsigned char)(len - drop);
-        if (work[sym] + 1U < match) {
-            here.op = (unsigned char)0;
-            here.val = work[sym];
-        }
-        else if (work[sym] >= match) {
-            here.op = (unsigned char)(extra[work[sym] - match]);
-            here.val = base[work[sym] - match];
-        }
-        else {
-            here.op = (unsigned char)(32 + 64);         /* end of block */
-            here.val = 0;
-        }
-
-        /* replicate for those indices with low len bits equal to huff */
-        incr = 1U << (len - drop);
-        fill = 1U << curr;
-        min = fill;                 /* save offset to next table */
-        do {
-            fill -= incr;
-            next[(huff >> drop) + fill] = here;
-        } while (fill != 0);
-
-        /* backwards increment the len-bit code huff */
-        incr = 1U << (len - 1);
-        while (huff & incr)
-            incr >>= 1;
-        if (incr != 0) {
-            huff &= incr - 1;
-            huff += incr;
-        }
-        else
-            huff = 0;
-
-        /* go to next symbol, update count, len */
-        sym++;
-        if (--(count[len]) == 0) {
-            if (len == max) break;
-            len = lens[work[sym]];
-        }
-
-        /* create new sub-table if needed */
-        if (len > root && (huff & mask) != low) {
-            /* if first time, transition to sub-tables */
-            if (drop == 0)
-                drop = root;
-
-            /* increment past last table */
-            next += min;            /* here min is 1 << curr */
-
-            /* determine length of next table */
-            curr = len - drop;
-            left = (int)(1 << curr);
-            while (curr + drop < max) {
-                left -= count[curr + drop];
-                if (left <= 0) break;
-                curr++;
-                left <<= 1;
-            }
-
-            /* check for enough space */
-            used += 1U << curr;
-            if ((type == LENS && used > ENOUGH_LENS) ||
-                (type == DISTS && used > ENOUGH_DISTS))
-                return 1;
-
-            /* point entry in root table to sub-table */
-            low = huff & mask;
-            (*table)[low].op = (unsigned char)curr;
-            (*table)[low].bits = (unsigned char)root;
-            (*table)[low].val = (unsigned short)(next - *table);
-        }
-    }
-
-    /* fill in remaining table entry if code is incomplete (guaranteed to have
-       at most one remaining entry, since if the code is incomplete, the
-       maximum code length that was allowed to get this far is one bit) */
-    if (huff != 0) {
-        here.op = (unsigned char)64;            /* invalid code marker */
-        here.bits = (unsigned char)(len - drop);
-        here.val = (unsigned short)0;
-        next[huff] = here;
-    }
-
-    /* set return parameters */
-    *table += used;
-    *bits = root;
-    return 0;
-}
diff --git a/dep/zlib/src/inftrees.h b/dep/zlib/src/inftrees.h
deleted file mode 100644
index baa53a0b1..000000000
--- a/dep/zlib/src/inftrees.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/* inftrees.h -- header to use inftrees.c
- * Copyright (C) 1995-2005, 2010 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/* WARNING: this file should *not* be used by applications. It is
-   part of the implementation of the compression library and is
-   subject to change. Applications should only use zlib.h.
- */
-
-/* Structure for decoding tables.  Each entry provides either the
-   information needed to do the operation requested by the code that
-   indexed that table entry, or it provides a pointer to another
-   table that indexes more bits of the code.  op indicates whether
-   the entry is a pointer to another table, a literal, a length or
-   distance, an end-of-block, or an invalid code.  For a table
-   pointer, the low four bits of op is the number of index bits of
-   that table.  For a length or distance, the low four bits of op
-   is the number of extra bits to get after the code.  bits is
-   the number of bits in this code or part of the code to drop off
-   of the bit buffer.  val is the actual byte to output in the case
-   of a literal, the base length or distance, or the offset from
-   the current table to the next table.  Each entry is four bytes. */
-typedef struct {
-    unsigned char op;           /* operation, extra bits, table bits */
-    unsigned char bits;         /* bits in this part of the code */
-    unsigned short val;         /* offset in table or code value */
-} code;
-
-/* op values as set by inflate_table():
-    00000000 - literal
-    0000tttt - table link, tttt != 0 is the number of table index bits
-    0001eeee - length or distance, eeee is the number of extra bits
-    01100000 - end of block
-    01000000 - invalid code
- */
-
-/* Maximum size of the dynamic table.  The maximum number of code structures is
-   1444, which is the sum of 852 for literal/length codes and 592 for distance
-   codes.  These values were found by exhaustive searches using the program
-   examples/enough.c found in the zlib distribtution.  The arguments to that
-   program are the number of symbols, the initial root table size, and the
-   maximum bit length of a code.  "enough 286 9 15" for literal/length codes
-   returns returns 852, and "enough 30 6 15" for distance codes returns 592.
-   The initial root table size (9 or 6) is found in the fifth argument of the
-   inflate_table() calls in inflate.c and infback.c.  If the root table size is
-   changed, then these maximum sizes would be need to be recalculated and
-   updated. */
-#define ENOUGH_LENS 852
-#define ENOUGH_DISTS 592
-#define ENOUGH (ENOUGH_LENS+ENOUGH_DISTS)
-
-/* Type of code to build for inflate_table() */
-typedef enum {
-    CODES,
-    LENS,
-    DISTS
-} codetype;
-
-int ZLIB_INTERNAL inflate_table OF((codetype type, unsigned short FAR *lens,
-                             unsigned codes, code FAR * FAR *table,
-                             unsigned FAR *bits, unsigned short FAR *work));
diff --git a/dep/zlib/src/trees.c b/dep/zlib/src/trees.c
deleted file mode 100644
index 50cf4b457..000000000
--- a/dep/zlib/src/trees.c
+++ /dev/null
@@ -1,1203 +0,0 @@
-/* trees.c -- output deflated data using Huffman coding
- * Copyright (C) 1995-2017 Jean-loup Gailly
- * detect_data_type() function provided freely by Cosmin Truta, 2006
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/*
- *  ALGORITHM
- *
- *      The "deflation" process uses several Huffman trees. The more
- *      common source values are represented by shorter bit sequences.
- *
- *      Each code tree is stored in a compressed form which is itself
- * a Huffman encoding of the lengths of all the code strings (in
- * ascending order by source values).  The actual code strings are
- * reconstructed from the lengths in the inflate process, as described
- * in the deflate specification.
- *
- *  REFERENCES
- *
- *      Deutsch, L.P.,"'Deflate' Compressed Data Format Specification".
- *      Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc
- *
- *      Storer, James A.
- *          Data Compression:  Methods and Theory, pp. 49-50.
- *          Computer Science Press, 1988.  ISBN 0-7167-8156-5.
- *
- *      Sedgewick, R.
- *          Algorithms, p290.
- *          Addison-Wesley, 1983. ISBN 0-201-06672-6.
- */
-
-/* @(#) $Id$ */
-
-/* #define GEN_TREES_H */
-
-#include "deflate.h"
-
-#ifdef ZLIB_DEBUG
-#  include <ctype.h>
-#endif
-
-/* ===========================================================================
- * Constants
- */
-
-#define MAX_BL_BITS 7
-/* Bit length codes must not exceed MAX_BL_BITS bits */
-
-#define END_BLOCK 256
-/* end of block literal code */
-
-#define REP_3_6      16
-/* repeat previous bit length 3-6 times (2 bits of repeat count) */
-
-#define REPZ_3_10    17
-/* repeat a zero length 3-10 times  (3 bits of repeat count) */
-
-#define REPZ_11_138  18
-/* repeat a zero length 11-138 times  (7 bits of repeat count) */
-
-local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */
-   = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
-
-local const int extra_dbits[D_CODES] /* extra bits for each distance code */
-   = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
-
-local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */
-   = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
-
-local const uch bl_order[BL_CODES]
-   = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
-/* The lengths of the bit length codes are sent in order of decreasing
- * probability, to avoid transmitting the lengths for unused bit length codes.
- */
-
-/* ===========================================================================
- * Local data. These are initialized only once.
- */
-
-#define DIST_CODE_LEN  512 /* see definition of array dist_code below */
-
-#if defined(GEN_TREES_H) || !defined(STDC)
-/* non ANSI compilers may not accept trees.h */
-
-local ct_data static_ltree[L_CODES+2];
-/* The static literal tree. Since the bit lengths are imposed, there is no
- * need for the L_CODES extra codes used during heap construction. However
- * The codes 286 and 287 are needed to build a canonical tree (see _tr_init
- * below).
- */
-
-local ct_data static_dtree[D_CODES];
-/* The static distance tree. (Actually a trivial tree since all codes use
- * 5 bits.)
- */
-
-uch _dist_code[DIST_CODE_LEN];
-/* Distance codes. The first 256 values correspond to the distances
- * 3 .. 258, the last 256 values correspond to the top 8 bits of
- * the 15 bit distances.
- */
-
-uch _length_code[MAX_MATCH-MIN_MATCH+1];
-/* length code for each normalized match length (0 == MIN_MATCH) */
-
-local int base_length[LENGTH_CODES];
-/* First normalized length for each code (0 = MIN_MATCH) */
-
-local int base_dist[D_CODES];
-/* First normalized distance for each code (0 = distance of 1) */
-
-#else
-#  include "trees.h"
-#endif /* GEN_TREES_H */
-
-struct static_tree_desc_s {
-    const ct_data *static_tree;  /* static tree or NULL */
-    const intf *extra_bits;      /* extra bits for each code or NULL */
-    int     extra_base;          /* base index for extra_bits */
-    int     elems;               /* max number of elements in the tree */
-    int     max_length;          /* max bit length for the codes */
-};
-
-local const static_tree_desc  static_l_desc =
-{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};
-
-local const static_tree_desc  static_d_desc =
-{static_dtree, extra_dbits, 0,          D_CODES, MAX_BITS};
-
-local const static_tree_desc  static_bl_desc =
-{(const ct_data *)0, extra_blbits, 0,   BL_CODES, MAX_BL_BITS};
-
-/* ===========================================================================
- * Local (static) routines in this file.
- */
-
-local void tr_static_init OF((void));
-local void init_block     OF((deflate_state *s));
-local void pqdownheap     OF((deflate_state *s, ct_data *tree, int k));
-local void gen_bitlen     OF((deflate_state *s, tree_desc *desc));
-local void gen_codes      OF((ct_data *tree, int max_code, ushf *bl_count));
-local void build_tree     OF((deflate_state *s, tree_desc *desc));
-local void scan_tree      OF((deflate_state *s, ct_data *tree, int max_code));
-local void send_tree      OF((deflate_state *s, ct_data *tree, int max_code));
-local int  build_bl_tree  OF((deflate_state *s));
-local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
-                              int blcodes));
-local void compress_block OF((deflate_state *s, const ct_data *ltree,
-                              const ct_data *dtree));
-local int  detect_data_type OF((deflate_state *s));
-local unsigned bi_reverse OF((unsigned value, int length));
-local void bi_windup      OF((deflate_state *s));
-local void bi_flush       OF((deflate_state *s));
-
-#ifdef GEN_TREES_H
-local void gen_trees_header OF((void));
-#endif
-
-#ifndef ZLIB_DEBUG
-#  define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
-   /* Send a code of the given tree. c and tree must not have side effects */
-
-#else /* !ZLIB_DEBUG */
-#  define send_code(s, c, tree) \
-     { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \
-       send_bits(s, tree[c].Code, tree[c].Len); }
-#endif
-
-/* ===========================================================================
- * Output a short LSB first on the stream.
- * IN assertion: there is enough room in pendingBuf.
- */
-#define put_short(s, w) { \
-    put_byte(s, (uch)((w) & 0xff)); \
-    put_byte(s, (uch)((ush)(w) >> 8)); \
-}
-
-/* ===========================================================================
- * Send a value on a given number of bits.
- * IN assertion: length <= 16 and value fits in length bits.
- */
-#ifdef ZLIB_DEBUG
-local void send_bits      OF((deflate_state *s, int value, int length));
-
-local void send_bits(s, value, length)
-    deflate_state *s;
-    int value;  /* value to send */
-    int length; /* number of bits */
-{
-    Tracevv((stderr," l %2d v %4x ", length, value));
-    Assert(length > 0 && length <= 15, "invalid length");
-    s->bits_sent += (ulg)length;
-
-    /* If not enough room in bi_buf, use (valid) bits from bi_buf and
-     * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
-     * unused bits in value.
-     */
-    if (s->bi_valid > (int)Buf_size - length) {
-        s->bi_buf |= (ush)value << s->bi_valid;
-        put_short(s, s->bi_buf);
-        s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
-        s->bi_valid += length - Buf_size;
-    } else {
-        s->bi_buf |= (ush)value << s->bi_valid;
-        s->bi_valid += length;
-    }
-}
-#else /* !ZLIB_DEBUG */
-
-#define send_bits(s, value, length) \
-{ int len = length;\
-  if (s->bi_valid > (int)Buf_size - len) {\
-    int val = (int)value;\
-    s->bi_buf |= (ush)val << s->bi_valid;\
-    put_short(s, s->bi_buf);\
-    s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
-    s->bi_valid += len - Buf_size;\
-  } else {\
-    s->bi_buf |= (ush)(value) << s->bi_valid;\
-    s->bi_valid += len;\
-  }\
-}
-#endif /* ZLIB_DEBUG */
-
-
-/* the arguments must not have side effects */
-
-/* ===========================================================================
- * Initialize the various 'constant' tables.
- */
-local void tr_static_init()
-{
-#if defined(GEN_TREES_H) || !defined(STDC)
-    static int static_init_done = 0;
-    int n;        /* iterates over tree elements */
-    int bits;     /* bit counter */
-    int length;   /* length value */
-    int code;     /* code value */
-    int dist;     /* distance index */
-    ush bl_count[MAX_BITS+1];
-    /* number of codes at each bit length for an optimal tree */
-
-    if (static_init_done) return;
-
-    /* For some embedded targets, global variables are not initialized: */
-#ifdef NO_INIT_GLOBAL_POINTERS
-    static_l_desc.static_tree = static_ltree;
-    static_l_desc.extra_bits = extra_lbits;
-    static_d_desc.static_tree = static_dtree;
-    static_d_desc.extra_bits = extra_dbits;
-    static_bl_desc.extra_bits = extra_blbits;
-#endif
-
-    /* Initialize the mapping length (0..255) -> length code (0..28) */
-    length = 0;
-    for (code = 0; code < LENGTH_CODES-1; code++) {
-        base_length[code] = length;
-        for (n = 0; n < (1<<extra_lbits[code]); n++) {
-            _length_code[length++] = (uch)code;
-        }
-    }
-    Assert (length == 256, "tr_static_init: length != 256");
-    /* Note that the length 255 (match length 258) can be represented
-     * in two different ways: code 284 + 5 bits or code 285, so we
-     * overwrite length_code[255] to use the best encoding:
-     */
-    _length_code[length-1] = (uch)code;
-
-    /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
-    dist = 0;
-    for (code = 0 ; code < 16; code++) {
-        base_dist[code] = dist;
-        for (n = 0; n < (1<<extra_dbits[code]); n++) {
-            _dist_code[dist++] = (uch)code;
-        }
-    }
-    Assert (dist == 256, "tr_static_init: dist != 256");
-    dist >>= 7; /* from now on, all distances are divided by 128 */
-    for ( ; code < D_CODES; code++) {
-        base_dist[code] = dist << 7;
-        for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
-            _dist_code[256 + dist++] = (uch)code;
-        }
-    }
-    Assert (dist == 256, "tr_static_init: 256+dist != 512");
-
-    /* Construct the codes of the static literal tree */
-    for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0;
-    n = 0;
-    while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++;
-    while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++;
-    while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++;
-    while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++;
-    /* Codes 286 and 287 do not exist, but we must include them in the
-     * tree construction to get a canonical Huffman tree (longest code
-     * all ones)
-     */
-    gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count);
-
-    /* The static distance tree is trivial: */
-    for (n = 0; n < D_CODES; n++) {
-        static_dtree[n].Len = 5;
-        static_dtree[n].Code = bi_reverse((unsigned)n, 5);
-    }
-    static_init_done = 1;
-
-#  ifdef GEN_TREES_H
-    gen_trees_header();
-#  endif
-#endif /* defined(GEN_TREES_H) || !defined(STDC) */
-}
-
-/* ===========================================================================
- * Genererate the file trees.h describing the static trees.
- */
-#ifdef GEN_TREES_H
-#  ifndef ZLIB_DEBUG
-#    include <stdio.h>
-#  endif
-
-#  define SEPARATOR(i, last, width) \
-      ((i) == (last)? "\n};\n\n" :    \
-       ((i) % (width) == (width)-1 ? ",\n" : ", "))
-
-void gen_trees_header()
-{
-    FILE *header = fopen("trees.h", "w");
-    int i;
-
-    Assert (header != NULL, "Can't open trees.h");
-    fprintf(header,
-            "/* header created automatically with -DGEN_TREES_H */\n\n");
-
-    fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n");
-    for (i = 0; i < L_CODES+2; i++) {
-        fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code,
-                static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5));
-    }
-
-    fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n");
-    for (i = 0; i < D_CODES; i++) {
-        fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code,
-                static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5));
-    }
-
-    fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n");
-    for (i = 0; i < DIST_CODE_LEN; i++) {
-        fprintf(header, "%2u%s", _dist_code[i],
-                SEPARATOR(i, DIST_CODE_LEN-1, 20));
-    }
-
-    fprintf(header,
-        "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n");
-    for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) {
-        fprintf(header, "%2u%s", _length_code[i],
-                SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20));
-    }
-
-    fprintf(header, "local const int base_length[LENGTH_CODES] = {\n");
-    for (i = 0; i < LENGTH_CODES; i++) {
-        fprintf(header, "%1u%s", base_length[i],
-                SEPARATOR(i, LENGTH_CODES-1, 20));
-    }
-
-    fprintf(header, "local const int base_dist[D_CODES] = {\n");
-    for (i = 0; i < D_CODES; i++) {
-        fprintf(header, "%5u%s", base_dist[i],
-                SEPARATOR(i, D_CODES-1, 10));
-    }
-
-    fclose(header);
-}
-#endif /* GEN_TREES_H */
-
-/* ===========================================================================
- * Initialize the tree data structures for a new zlib stream.
- */
-void ZLIB_INTERNAL _tr_init(s)
-    deflate_state *s;
-{
-    tr_static_init();
-
-    s->l_desc.dyn_tree = s->dyn_ltree;
-    s->l_desc.stat_desc = &static_l_desc;
-
-    s->d_desc.dyn_tree = s->dyn_dtree;
-    s->d_desc.stat_desc = &static_d_desc;
-
-    s->bl_desc.dyn_tree = s->bl_tree;
-    s->bl_desc.stat_desc = &static_bl_desc;
-
-    s->bi_buf = 0;
-    s->bi_valid = 0;
-#ifdef ZLIB_DEBUG
-    s->compressed_len = 0L;
-    s->bits_sent = 0L;
-#endif
-
-    /* Initialize the first block of the first file: */
-    init_block(s);
-}
-
-/* ===========================================================================
- * Initialize a new block.
- */
-local void init_block(s)
-    deflate_state *s;
-{
-    int n; /* iterates over tree elements */
-
-    /* Initialize the trees. */
-    for (n = 0; n < L_CODES;  n++) s->dyn_ltree[n].Freq = 0;
-    for (n = 0; n < D_CODES;  n++) s->dyn_dtree[n].Freq = 0;
-    for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
-
-    s->dyn_ltree[END_BLOCK].Freq = 1;
-    s->opt_len = s->static_len = 0L;
-    s->last_lit = s->matches = 0;
-}
-
-#define SMALLEST 1
-/* Index within the heap array of least frequent node in the Huffman tree */
-
-
-/* ===========================================================================
- * Remove the smallest element from the heap and recreate the heap with
- * one less element. Updates heap and heap_len.
- */
-#define pqremove(s, tree, top) \
-{\
-    top = s->heap[SMALLEST]; \
-    s->heap[SMALLEST] = s->heap[s->heap_len--]; \
-    pqdownheap(s, tree, SMALLEST); \
-}
-
-/* ===========================================================================
- * Compares to subtrees, using the tree depth as tie breaker when
- * the subtrees have equal frequency. This minimizes the worst case length.
- */
-#define smaller(tree, n, m, depth) \
-   (tree[n].Freq < tree[m].Freq || \
-   (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m]))
-
-/* ===========================================================================
- * Restore the heap property by moving down the tree starting at node k,
- * exchanging a node with the smallest of its two sons if necessary, stopping
- * when the heap property is re-established (each father smaller than its
- * two sons).
- */
-local void pqdownheap(s, tree, k)
-    deflate_state *s;
-    ct_data *tree;  /* the tree to restore */
-    int k;               /* node to move down */
-{
-    int v = s->heap[k];
-    int j = k << 1;  /* left son of k */
-    while (j <= s->heap_len) {
-        /* Set j to the smallest of the two sons: */
-        if (j < s->heap_len &&
-            smaller(tree, s->heap[j+1], s->heap[j], s->depth)) {
-            j++;
-        }
-        /* Exit if v is smaller than both sons */
-        if (smaller(tree, v, s->heap[j], s->depth)) break;
-
-        /* Exchange v with the smallest son */
-        s->heap[k] = s->heap[j];  k = j;
-
-        /* And continue down the tree, setting j to the left son of k */
-        j <<= 1;
-    }
-    s->heap[k] = v;
-}
-
-/* ===========================================================================
- * Compute the optimal bit lengths for a tree and update the total bit length
- * for the current block.
- * IN assertion: the fields freq and dad are set, heap[heap_max] and
- *    above are the tree nodes sorted by increasing frequency.
- * OUT assertions: the field len is set to the optimal bit length, the
- *     array bl_count contains the frequencies for each bit length.
- *     The length opt_len is updated; static_len is also updated if stree is
- *     not null.
- */
-local void gen_bitlen(s, desc)
-    deflate_state *s;
-    tree_desc *desc;    /* the tree descriptor */
-{
-    ct_data *tree        = desc->dyn_tree;
-    int max_code         = desc->max_code;
-    const ct_data *stree = desc->stat_desc->static_tree;
-    const intf *extra    = desc->stat_desc->extra_bits;
-    int base             = desc->stat_desc->extra_base;
-    int max_length       = desc->stat_desc->max_length;
-    int h;              /* heap index */
-    int n, m;           /* iterate over the tree elements */
-    int bits;           /* bit length */
-    int xbits;          /* extra bits */
-    ush f;              /* frequency */
-    int overflow = 0;   /* number of elements with bit length too large */
-
-    for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0;
-
-    /* In a first pass, compute the optimal bit lengths (which may
-     * overflow in the case of the bit length tree).
-     */
-    tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */
-
-    for (h = s->heap_max+1; h < HEAP_SIZE; h++) {
-        n = s->heap[h];
-        bits = tree[tree[n].Dad].Len + 1;
-        if (bits > max_length) bits = max_length, overflow++;
-        tree[n].Len = (ush)bits;
-        /* We overwrite tree[n].Dad which is no longer needed */
-
-        if (n > max_code) continue; /* not a leaf node */
-
-        s->bl_count[bits]++;
-        xbits = 0;
-        if (n >= base) xbits = extra[n-base];
-        f = tree[n].Freq;
-        s->opt_len += (ulg)f * (unsigned)(bits + xbits);
-        if (stree) s->static_len += (ulg)f * (unsigned)(stree[n].Len + xbits);
-    }
-    if (overflow == 0) return;
-
-    Tracev((stderr,"\nbit length overflow\n"));
-    /* This happens for example on obj2 and pic of the Calgary corpus */
-
-    /* Find the first bit length which could increase: */
-    do {
-        bits = max_length-1;
-        while (s->bl_count[bits] == 0) bits--;
-        s->bl_count[bits]--;      /* move one leaf down the tree */
-        s->bl_count[bits+1] += 2; /* move one overflow item as its brother */
-        s->bl_count[max_length]--;
-        /* The brother of the overflow item also moves one step up,
-         * but this does not affect bl_count[max_length]
-         */
-        overflow -= 2;
-    } while (overflow > 0);
-
-    /* Now recompute all bit lengths, scanning in increasing frequency.
-     * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
-     * lengths instead of fixing only the wrong ones. This idea is taken
-     * from 'ar' written by Haruhiko Okumura.)
-     */
-    for (bits = max_length; bits != 0; bits--) {
-        n = s->bl_count[bits];
-        while (n != 0) {
-            m = s->heap[--h];
-            if (m > max_code) continue;
-            if ((unsigned) tree[m].Len != (unsigned) bits) {
-                Tracev((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
-                s->opt_len += ((ulg)bits - tree[m].Len) * tree[m].Freq;
-                tree[m].Len = (ush)bits;
-            }
-            n--;
-        }
-    }
-}
-
-/* ===========================================================================
- * Generate the codes for a given tree and bit counts (which need not be
- * optimal).
- * IN assertion: the array bl_count contains the bit length statistics for
- * the given tree and the field len is set for all tree elements.
- * OUT assertion: the field code is set for all tree elements of non
- *     zero code length.
- */
-local void gen_codes (tree, max_code, bl_count)
-    ct_data *tree;             /* the tree to decorate */
-    int max_code;              /* largest code with non zero frequency */
-    ushf *bl_count;            /* number of codes at each bit length */
-{
-    ush next_code[MAX_BITS+1]; /* next code value for each bit length */
-    unsigned code = 0;         /* running code value */
-    int bits;                  /* bit index */
-    int n;                     /* code index */
-
-    /* The distribution counts are first used to generate the code values
-     * without bit reversal.
-     */
-    for (bits = 1; bits <= MAX_BITS; bits++) {
-        code = (code + bl_count[bits-1]) << 1;
-        next_code[bits] = (ush)code;
-    }
-    /* Check that the bit counts in bl_count are consistent. The last code
-     * must be all ones.
-     */
-    Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1,
-            "inconsistent bit counts");
-    Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
-
-    for (n = 0;  n <= max_code; n++) {
-        int len = tree[n].Len;
-        if (len == 0) continue;
-        /* Now reverse the bits */
-        tree[n].Code = (ush)bi_reverse(next_code[len]++, len);
-
-        Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
-             n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1));
-    }
-}
-
-/* ===========================================================================
- * Construct one Huffman tree and assigns the code bit strings and lengths.
- * Update the total bit length for the current block.
- * IN assertion: the field freq is set for all tree elements.
- * OUT assertions: the fields len and code are set to the optimal bit length
- *     and corresponding code. The length opt_len is updated; static_len is
- *     also updated if stree is not null. The field max_code is set.
- */
-local void build_tree(s, desc)
-    deflate_state *s;
-    tree_desc *desc; /* the tree descriptor */
-{
-    ct_data *tree         = desc->dyn_tree;
-    const ct_data *stree  = desc->stat_desc->static_tree;
-    int elems             = desc->stat_desc->elems;
-    int n, m;          /* iterate over heap elements */
-    int max_code = -1; /* largest code with non zero frequency */
-    int node;          /* new node being created */
-
-    /* Construct the initial heap, with least frequent element in
-     * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
-     * heap[0] is not used.
-     */
-    s->heap_len = 0, s->heap_max = HEAP_SIZE;
-
-    for (n = 0; n < elems; n++) {
-        if (tree[n].Freq != 0) {
-            s->heap[++(s->heap_len)] = max_code = n;
-            s->depth[n] = 0;
-        } else {
-            tree[n].Len = 0;
-        }
-    }
-
-    /* The pkzip format requires that at least one distance code exists,
-     * and that at least one bit should be sent even if there is only one
-     * possible code. So to avoid special checks later on we force at least
-     * two codes of non zero frequency.
-     */
-    while (s->heap_len < 2) {
-        node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0);
-        tree[node].Freq = 1;
-        s->depth[node] = 0;
-        s->opt_len--; if (stree) s->static_len -= stree[node].Len;
-        /* node is 0 or 1 so it does not have extra bits */
-    }
-    desc->max_code = max_code;
-
-    /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
-     * establish sub-heaps of increasing lengths:
-     */
-    for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n);
-
-    /* Construct the Huffman tree by repeatedly combining the least two
-     * frequent nodes.
-     */
-    node = elems;              /* next internal node of the tree */
-    do {
-        pqremove(s, tree, n);  /* n = node of least frequency */
-        m = s->heap[SMALLEST]; /* m = node of next least frequency */
-
-        s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */
-        s->heap[--(s->heap_max)] = m;
-
-        /* Create a new node father of n and m */
-        tree[node].Freq = tree[n].Freq + tree[m].Freq;
-        s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ?
-                                s->depth[n] : s->depth[m]) + 1);
-        tree[n].Dad = tree[m].Dad = (ush)node;
-#ifdef DUMP_BL_TREE
-        if (tree == s->bl_tree) {
-            fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)",
-                    node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq);
-        }
-#endif
-        /* and insert the new node in the heap */
-        s->heap[SMALLEST] = node++;
-        pqdownheap(s, tree, SMALLEST);
-
-    } while (s->heap_len >= 2);
-
-    s->heap[--(s->heap_max)] = s->heap[SMALLEST];
-
-    /* At this point, the fields freq and dad are set. We can now
-     * generate the bit lengths.
-     */
-    gen_bitlen(s, (tree_desc *)desc);
-
-    /* The field len is now set, we can generate the bit codes */
-    gen_codes ((ct_data *)tree, max_code, s->bl_count);
-}
-
-/* ===========================================================================
- * Scan a literal or distance tree to determine the frequencies of the codes
- * in the bit length tree.
- */
-local void scan_tree (s, tree, max_code)
-    deflate_state *s;
-    ct_data *tree;   /* the tree to be scanned */
-    int max_code;    /* and its largest code of non zero frequency */
-{
-    int n;                     /* iterates over all tree elements */
-    int prevlen = -1;          /* last emitted length */
-    int curlen;                /* length of current code */
-    int nextlen = tree[0].Len; /* length of next code */
-    int count = 0;             /* repeat count of the current code */
-    int max_count = 7;         /* max repeat count */
-    int min_count = 4;         /* min repeat count */
-
-    if (nextlen == 0) max_count = 138, min_count = 3;
-    tree[max_code+1].Len = (ush)0xffff; /* guard */
-
-    for (n = 0; n <= max_code; n++) {
-        curlen = nextlen; nextlen = tree[n+1].Len;
-        if (++count < max_count && curlen == nextlen) {
-            continue;
-        } else if (count < min_count) {
-            s->bl_tree[curlen].Freq += count;
-        } else if (curlen != 0) {
-            if (curlen != prevlen) s->bl_tree[curlen].Freq++;
-            s->bl_tree[REP_3_6].Freq++;
-        } else if (count <= 10) {
-            s->bl_tree[REPZ_3_10].Freq++;
-        } else {
-            s->bl_tree[REPZ_11_138].Freq++;
-        }
-        count = 0; prevlen = curlen;
-        if (nextlen == 0) {
-            max_count = 138, min_count = 3;
-        } else if (curlen == nextlen) {
-            max_count = 6, min_count = 3;
-        } else {
-            max_count = 7, min_count = 4;
-        }
-    }
-}
-
-/* ===========================================================================
- * Send a literal or distance tree in compressed form, using the codes in
- * bl_tree.
- */
-local void send_tree (s, tree, max_code)
-    deflate_state *s;
-    ct_data *tree; /* the tree to be scanned */
-    int max_code;       /* and its largest code of non zero frequency */
-{
-    int n;                     /* iterates over all tree elements */
-    int prevlen = -1;          /* last emitted length */
-    int curlen;                /* length of current code */
-    int nextlen = tree[0].Len; /* length of next code */
-    int count = 0;             /* repeat count of the current code */
-    int max_count = 7;         /* max repeat count */
-    int min_count = 4;         /* min repeat count */
-
-    /* tree[max_code+1].Len = -1; */  /* guard already set */
-    if (nextlen == 0) max_count = 138, min_count = 3;
-
-    for (n = 0; n <= max_code; n++) {
-        curlen = nextlen; nextlen = tree[n+1].Len;
-        if (++count < max_count && curlen == nextlen) {
-            continue;
-        } else if (count < min_count) {
-            do { send_code(s, curlen, s->bl_tree); } while (--count != 0);
-
-        } else if (curlen != 0) {
-            if (curlen != prevlen) {
-                send_code(s, curlen, s->bl_tree); count--;
-            }
-            Assert(count >= 3 && count <= 6, " 3_6?");
-            send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2);
-
-        } else if (count <= 10) {
-            send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3);
-
-        } else {
-            send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7);
-        }
-        count = 0; prevlen = curlen;
-        if (nextlen == 0) {
-            max_count = 138, min_count = 3;
-        } else if (curlen == nextlen) {
-            max_count = 6, min_count = 3;
-        } else {
-            max_count = 7, min_count = 4;
-        }
-    }
-}
-
-/* ===========================================================================
- * Construct the Huffman tree for the bit lengths and return the index in
- * bl_order of the last bit length code to send.
- */
-local int build_bl_tree(s)
-    deflate_state *s;
-{
-    int max_blindex;  /* index of last bit length code of non zero freq */
-
-    /* Determine the bit length frequencies for literal and distance trees */
-    scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code);
-    scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code);
-
-    /* Build the bit length tree: */
-    build_tree(s, (tree_desc *)(&(s->bl_desc)));
-    /* opt_len now includes the length of the tree representations, except
-     * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
-     */
-
-    /* Determine the number of bit length codes to send. The pkzip format
-     * requires that at least 4 bit length codes be sent. (appnote.txt says
-     * 3 but the actual value used is 4.)
-     */
-    for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {
-        if (s->bl_tree[bl_order[max_blindex]].Len != 0) break;
-    }
-    /* Update opt_len to include the bit length tree and counts */
-    s->opt_len += 3*((ulg)max_blindex+1) + 5+5+4;
-    Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld",
-            s->opt_len, s->static_len));
-
-    return max_blindex;
-}
-
-/* ===========================================================================
- * Send the header for a block using dynamic Huffman trees: the counts, the
- * lengths of the bit length codes, the literal tree and the distance tree.
- * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
- */
-local void send_all_trees(s, lcodes, dcodes, blcodes)
-    deflate_state *s;
-    int lcodes, dcodes, blcodes; /* number of codes for each tree */
-{
-    int rank;                    /* index in bl_order */
-
-    Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
-    Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
-            "too many codes");
-    Tracev((stderr, "\nbl counts: "));
-    send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */
-    send_bits(s, dcodes-1,   5);
-    send_bits(s, blcodes-4,  4); /* not -3 as stated in appnote.txt */
-    for (rank = 0; rank < blcodes; rank++) {
-        Tracev((stderr, "\nbl code %2d ", bl_order[rank]));
-        send_bits(s, s->bl_tree[bl_order[rank]].Len, 3);
-    }
-    Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent));
-
-    send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */
-    Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent));
-
-    send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */
-    Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent));
-}
-
-/* ===========================================================================
- * Send a stored block
- */
-void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last)
-    deflate_state *s;
-    charf *buf;       /* input block */
-    ulg stored_len;   /* length of input block */
-    int last;         /* one if this is the last block for a file */
-{
-    send_bits(s, (STORED_BLOCK<<1)+last, 3);    /* send block type */
-    bi_windup(s);        /* align on byte boundary */
-    put_short(s, (ush)stored_len);
-    put_short(s, (ush)~stored_len);
-    zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len);
-    s->pending += stored_len;
-#ifdef ZLIB_DEBUG
-    s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
-    s->compressed_len += (stored_len + 4) << 3;
-    s->bits_sent += 2*16;
-    s->bits_sent += stored_len<<3;
-#endif
-}
-
-/* ===========================================================================
- * Flush the bits in the bit buffer to pending output (leaves at most 7 bits)
- */
-void ZLIB_INTERNAL _tr_flush_bits(s)
-    deflate_state *s;
-{
-    bi_flush(s);
-}
-
-/* ===========================================================================
- * Send one empty static block to give enough lookahead for inflate.
- * This takes 10 bits, of which 7 may remain in the bit buffer.
- */
-void ZLIB_INTERNAL _tr_align(s)
-    deflate_state *s;
-{
-    send_bits(s, STATIC_TREES<<1, 3);
-    send_code(s, END_BLOCK, static_ltree);
-#ifdef ZLIB_DEBUG
-    s->compressed_len += 10L; /* 3 for block type, 7 for EOB */
-#endif
-    bi_flush(s);
-}
-
-/* ===========================================================================
- * Determine the best encoding for the current block: dynamic trees, static
- * trees or store, and write out the encoded block.
- */
-void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)
-    deflate_state *s;
-    charf *buf;       /* input block, or NULL if too old */
-    ulg stored_len;   /* length of input block */
-    int last;         /* one if this is the last block for a file */
-{
-    ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
-    int max_blindex = 0;  /* index of last bit length code of non zero freq */
-
-    /* Build the Huffman trees unless a stored block is forced */
-    if (s->level > 0) {
-
-        /* Check if the file is binary or text */
-        if (s->strm->data_type == Z_UNKNOWN)
-            s->strm->data_type = detect_data_type(s);
-
-        /* Construct the literal and distance trees */
-        build_tree(s, (tree_desc *)(&(s->l_desc)));
-        Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len,
-                s->static_len));
-
-        build_tree(s, (tree_desc *)(&(s->d_desc)));
-        Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len,
-                s->static_len));
-        /* At this point, opt_len and static_len are the total bit lengths of
-         * the compressed block data, excluding the tree representations.
-         */
-
-        /* Build the bit length tree for the above two trees, and get the index
-         * in bl_order of the last bit length code to send.
-         */
-        max_blindex = build_bl_tree(s);
-
-        /* Determine the best encoding. Compute the block lengths in bytes. */
-        opt_lenb = (s->opt_len+3+7)>>3;
-        static_lenb = (s->static_len+3+7)>>3;
-
-        Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
-                opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
-                s->last_lit));
-
-        if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
-
-    } else {
-        Assert(buf != (char*)0, "lost buf");
-        opt_lenb = static_lenb = stored_len + 5; /* force a stored block */
-    }
-
-#ifdef FORCE_STORED
-    if (buf != (char*)0) { /* force stored block */
-#else
-    if (stored_len+4 <= opt_lenb && buf != (char*)0) {
-                       /* 4: two words for the lengths */
-#endif
-        /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
-         * Otherwise we can't have processed more than WSIZE input bytes since
-         * the last block flush, because compression would have been
-         * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
-         * transform a block into a stored block.
-         */
-        _tr_stored_block(s, buf, stored_len, last);
-
-#ifdef FORCE_STATIC
-    } else if (static_lenb >= 0) { /* force static trees */
-#else
-    } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) {
-#endif
-        send_bits(s, (STATIC_TREES<<1)+last, 3);
-        compress_block(s, (const ct_data *)static_ltree,
-                       (const ct_data *)static_dtree);
-#ifdef ZLIB_DEBUG
-        s->compressed_len += 3 + s->static_len;
-#endif
-    } else {
-        send_bits(s, (DYN_TREES<<1)+last, 3);
-        send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
-                       max_blindex+1);
-        compress_block(s, (const ct_data *)s->dyn_ltree,
-                       (const ct_data *)s->dyn_dtree);
-#ifdef ZLIB_DEBUG
-        s->compressed_len += 3 + s->opt_len;
-#endif
-    }
-    Assert (s->compressed_len == s->bits_sent, "bad compressed size");
-    /* The above check is made mod 2^32, for files larger than 512 MB
-     * and uLong implemented on 32 bits.
-     */
-    init_block(s);
-
-    if (last) {
-        bi_windup(s);
-#ifdef ZLIB_DEBUG
-        s->compressed_len += 7;  /* align on byte boundary */
-#endif
-    }
-    Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
-           s->compressed_len-7*last));
-}
-
-/* ===========================================================================
- * Save the match info and tally the frequency counts. Return true if
- * the current block must be flushed.
- */
-int ZLIB_INTERNAL _tr_tally (s, dist, lc)
-    deflate_state *s;
-    unsigned dist;  /* distance of matched string */
-    unsigned lc;    /* match length-MIN_MATCH or unmatched char (if dist==0) */
-{
-    s->d_buf[s->last_lit] = (ush)dist;
-    s->l_buf[s->last_lit++] = (uch)lc;
-    if (dist == 0) {
-        /* lc is the unmatched char */
-        s->dyn_ltree[lc].Freq++;
-    } else {
-        s->matches++;
-        /* Here, lc is the match length - MIN_MATCH */
-        dist--;             /* dist = match distance - 1 */
-        Assert((ush)dist < (ush)MAX_DIST(s) &&
-               (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
-               (ush)d_code(dist) < (ush)D_CODES,  "_tr_tally: bad match");
-
-        s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++;
-        s->dyn_dtree[d_code(dist)].Freq++;
-    }
-
-#ifdef TRUNCATE_BLOCK
-    /* Try to guess if it is profitable to stop the current block here */
-    if ((s->last_lit & 0x1fff) == 0 && s->level > 2) {
-        /* Compute an upper bound for the compressed length */
-        ulg out_length = (ulg)s->last_lit*8L;
-        ulg in_length = (ulg)((long)s->strstart - s->block_start);
-        int dcode;
-        for (dcode = 0; dcode < D_CODES; dcode++) {
-            out_length += (ulg)s->dyn_dtree[dcode].Freq *
-                (5L+extra_dbits[dcode]);
-        }
-        out_length >>= 3;
-        Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
-               s->last_lit, in_length, out_length,
-               100L - out_length*100L/in_length));
-        if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
-    }
-#endif
-    return (s->last_lit == s->lit_bufsize-1);
-    /* We avoid equality with lit_bufsize because of wraparound at 64K
-     * on 16 bit machines and because stored blocks are restricted to
-     * 64K-1 bytes.
-     */
-}
-
-/* ===========================================================================
- * Send the block data compressed using the given Huffman trees
- */
-local void compress_block(s, ltree, dtree)
-    deflate_state *s;
-    const ct_data *ltree; /* literal tree */
-    const ct_data *dtree; /* distance tree */
-{
-    unsigned dist;      /* distance of matched string */
-    int lc;             /* match length or unmatched char (if dist == 0) */
-    unsigned lx = 0;    /* running index in l_buf */
-    unsigned code;      /* the code to send */
-    int extra;          /* number of extra bits to send */
-
-    if (s->last_lit != 0) do {
-        dist = s->d_buf[lx];
-        lc = s->l_buf[lx++];
-        if (dist == 0) {
-            send_code(s, lc, ltree); /* send a literal byte */
-            Tracecv(isgraph(lc), (stderr," '%c' ", lc));
-        } else {
-            /* Here, lc is the match length - MIN_MATCH */
-            code = _length_code[lc];
-            send_code(s, code+LITERALS+1, ltree); /* send the length code */
-            extra = extra_lbits[code];
-            if (extra != 0) {
-                lc -= base_length[code];
-                send_bits(s, lc, extra);       /* send the extra length bits */
-            }
-            dist--; /* dist is now the match distance - 1 */
-            code = d_code(dist);
-            Assert (code < D_CODES, "bad d_code");
-
-            send_code(s, code, dtree);       /* send the distance code */
-            extra = extra_dbits[code];
-            if (extra != 0) {
-                dist -= (unsigned)base_dist[code];
-                send_bits(s, dist, extra);   /* send the extra distance bits */
-            }
-        } /* literal or match pair ? */
-
-        /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
-        Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx,
-               "pendingBuf overflow");
-
-    } while (lx < s->last_lit);
-
-    send_code(s, END_BLOCK, ltree);
-}
-
-/* ===========================================================================
- * Check if the data type is TEXT or BINARY, using the following algorithm:
- * - TEXT if the two conditions below are satisfied:
- *    a) There are no non-portable control characters belonging to the
- *       "black list" (0..6, 14..25, 28..31).
- *    b) There is at least one printable character belonging to the
- *       "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
- * - BINARY otherwise.
- * - The following partially-portable control characters form a
- *   "gray list" that is ignored in this detection algorithm:
- *   (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
- * IN assertion: the fields Freq of dyn_ltree are set.
- */
-local int detect_data_type(s)
-    deflate_state *s;
-{
-    /* black_mask is the bit mask of black-listed bytes
-     * set bits 0..6, 14..25, and 28..31
-     * 0xf3ffc07f = binary 11110011111111111100000001111111
-     */
-    unsigned long black_mask = 0xf3ffc07fUL;
-    int n;
-
-    /* Check for non-textual ("black-listed") bytes. */
-    for (n = 0; n <= 31; n++, black_mask >>= 1)
-        if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0))
-            return Z_BINARY;
-
-    /* Check for textual ("white-listed") bytes. */
-    if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0
-            || s->dyn_ltree[13].Freq != 0)
-        return Z_TEXT;
-    for (n = 32; n < LITERALS; n++)
-        if (s->dyn_ltree[n].Freq != 0)
-            return Z_TEXT;
-
-    /* There are no "black-listed" or "white-listed" bytes:
-     * this stream either is empty or has tolerated ("gray-listed") bytes only.
-     */
-    return Z_BINARY;
-}
-
-/* ===========================================================================
- * Reverse the first len bits of a code, using straightforward code (a faster
- * method would use a table)
- * IN assertion: 1 <= len <= 15
- */
-local unsigned bi_reverse(code, len)
-    unsigned code; /* the value to invert */
-    int len;       /* its bit length */
-{
-    register unsigned res = 0;
-    do {
-        res |= code & 1;
-        code >>= 1, res <<= 1;
-    } while (--len > 0);
-    return res >> 1;
-}
-
-/* ===========================================================================
- * Flush the bit buffer, keeping at most 7 bits in it.
- */
-local void bi_flush(s)
-    deflate_state *s;
-{
-    if (s->bi_valid == 16) {
-        put_short(s, s->bi_buf);
-        s->bi_buf = 0;
-        s->bi_valid = 0;
-    } else if (s->bi_valid >= 8) {
-        put_byte(s, (Byte)s->bi_buf);
-        s->bi_buf >>= 8;
-        s->bi_valid -= 8;
-    }
-}
-
-/* ===========================================================================
- * Flush the bit buffer and align the output on a byte boundary
- */
-local void bi_windup(s)
-    deflate_state *s;
-{
-    if (s->bi_valid > 8) {
-        put_short(s, s->bi_buf);
-    } else if (s->bi_valid > 0) {
-        put_byte(s, (Byte)s->bi_buf);
-    }
-    s->bi_buf = 0;
-    s->bi_valid = 0;
-#ifdef ZLIB_DEBUG
-    s->bits_sent = (s->bits_sent+7) & ~7;
-#endif
-}
diff --git a/dep/zlib/src/trees.h b/dep/zlib/src/trees.h
deleted file mode 100644
index d35639d82..000000000
--- a/dep/zlib/src/trees.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/* header created automatically with -DGEN_TREES_H */
-
-local const ct_data static_ltree[L_CODES+2] = {
-{{ 12},{  8}}, {{140},{  8}}, {{ 76},{  8}}, {{204},{  8}}, {{ 44},{  8}},
-{{172},{  8}}, {{108},{  8}}, {{236},{  8}}, {{ 28},{  8}}, {{156},{  8}},
-{{ 92},{  8}}, {{220},{  8}}, {{ 60},{  8}}, {{188},{  8}}, {{124},{  8}},
-{{252},{  8}}, {{  2},{  8}}, {{130},{  8}}, {{ 66},{  8}}, {{194},{  8}},
-{{ 34},{  8}}, {{162},{  8}}, {{ 98},{  8}}, {{226},{  8}}, {{ 18},{  8}},
-{{146},{  8}}, {{ 82},{  8}}, {{210},{  8}}, {{ 50},{  8}}, {{178},{  8}},
-{{114},{  8}}, {{242},{  8}}, {{ 10},{  8}}, {{138},{  8}}, {{ 74},{  8}},
-{{202},{  8}}, {{ 42},{  8}}, {{170},{  8}}, {{106},{  8}}, {{234},{  8}},
-{{ 26},{  8}}, {{154},{  8}}, {{ 90},{  8}}, {{218},{  8}}, {{ 58},{  8}},
-{{186},{  8}}, {{122},{  8}}, {{250},{  8}}, {{  6},{  8}}, {{134},{  8}},
-{{ 70},{  8}}, {{198},{  8}}, {{ 38},{  8}}, {{166},{  8}}, {{102},{  8}},
-{{230},{  8}}, {{ 22},{  8}}, {{150},{  8}}, {{ 86},{  8}}, {{214},{  8}},
-{{ 54},{  8}}, {{182},{  8}}, {{118},{  8}}, {{246},{  8}}, {{ 14},{  8}},
-{{142},{  8}}, {{ 78},{  8}}, {{206},{  8}}, {{ 46},{  8}}, {{174},{  8}},
-{{110},{  8}}, {{238},{  8}}, {{ 30},{  8}}, {{158},{  8}}, {{ 94},{  8}},
-{{222},{  8}}, {{ 62},{  8}}, {{190},{  8}}, {{126},{  8}}, {{254},{  8}},
-{{  1},{  8}}, {{129},{  8}}, {{ 65},{  8}}, {{193},{  8}}, {{ 33},{  8}},
-{{161},{  8}}, {{ 97},{  8}}, {{225},{  8}}, {{ 17},{  8}}, {{145},{  8}},
-{{ 81},{  8}}, {{209},{  8}}, {{ 49},{  8}}, {{177},{  8}}, {{113},{  8}},
-{{241},{  8}}, {{  9},{  8}}, {{137},{  8}}, {{ 73},{  8}}, {{201},{  8}},
-{{ 41},{  8}}, {{169},{  8}}, {{105},{  8}}, {{233},{  8}}, {{ 25},{  8}},
-{{153},{  8}}, {{ 89},{  8}}, {{217},{  8}}, {{ 57},{  8}}, {{185},{  8}},
-{{121},{  8}}, {{249},{  8}}, {{  5},{  8}}, {{133},{  8}}, {{ 69},{  8}},
-{{197},{  8}}, {{ 37},{  8}}, {{165},{  8}}, {{101},{  8}}, {{229},{  8}},
-{{ 21},{  8}}, {{149},{  8}}, {{ 85},{  8}}, {{213},{  8}}, {{ 53},{  8}},
-{{181},{  8}}, {{117},{  8}}, {{245},{  8}}, {{ 13},{  8}}, {{141},{  8}},
-{{ 77},{  8}}, {{205},{  8}}, {{ 45},{  8}}, {{173},{  8}}, {{109},{  8}},
-{{237},{  8}}, {{ 29},{  8}}, {{157},{  8}}, {{ 93},{  8}}, {{221},{  8}},
-{{ 61},{  8}}, {{189},{  8}}, {{125},{  8}}, {{253},{  8}}, {{ 19},{  9}},
-{{275},{  9}}, {{147},{  9}}, {{403},{  9}}, {{ 83},{  9}}, {{339},{  9}},
-{{211},{  9}}, {{467},{  9}}, {{ 51},{  9}}, {{307},{  9}}, {{179},{  9}},
-{{435},{  9}}, {{115},{  9}}, {{371},{  9}}, {{243},{  9}}, {{499},{  9}},
-{{ 11},{  9}}, {{267},{  9}}, {{139},{  9}}, {{395},{  9}}, {{ 75},{  9}},
-{{331},{  9}}, {{203},{  9}}, {{459},{  9}}, {{ 43},{  9}}, {{299},{  9}},
-{{171},{  9}}, {{427},{  9}}, {{107},{  9}}, {{363},{  9}}, {{235},{  9}},
-{{491},{  9}}, {{ 27},{  9}}, {{283},{  9}}, {{155},{  9}}, {{411},{  9}},
-{{ 91},{  9}}, {{347},{  9}}, {{219},{  9}}, {{475},{  9}}, {{ 59},{  9}},
-{{315},{  9}}, {{187},{  9}}, {{443},{  9}}, {{123},{  9}}, {{379},{  9}},
-{{251},{  9}}, {{507},{  9}}, {{  7},{  9}}, {{263},{  9}}, {{135},{  9}},
-{{391},{  9}}, {{ 71},{  9}}, {{327},{  9}}, {{199},{  9}}, {{455},{  9}},
-{{ 39},{  9}}, {{295},{  9}}, {{167},{  9}}, {{423},{  9}}, {{103},{  9}},
-{{359},{  9}}, {{231},{  9}}, {{487},{  9}}, {{ 23},{  9}}, {{279},{  9}},
-{{151},{  9}}, {{407},{  9}}, {{ 87},{  9}}, {{343},{  9}}, {{215},{  9}},
-{{471},{  9}}, {{ 55},{  9}}, {{311},{  9}}, {{183},{  9}}, {{439},{  9}},
-{{119},{  9}}, {{375},{  9}}, {{247},{  9}}, {{503},{  9}}, {{ 15},{  9}},
-{{271},{  9}}, {{143},{  9}}, {{399},{  9}}, {{ 79},{  9}}, {{335},{  9}},
-{{207},{  9}}, {{463},{  9}}, {{ 47},{  9}}, {{303},{  9}}, {{175},{  9}},
-{{431},{  9}}, {{111},{  9}}, {{367},{  9}}, {{239},{  9}}, {{495},{  9}},
-{{ 31},{  9}}, {{287},{  9}}, {{159},{  9}}, {{415},{  9}}, {{ 95},{  9}},
-{{351},{  9}}, {{223},{  9}}, {{479},{  9}}, {{ 63},{  9}}, {{319},{  9}},
-{{191},{  9}}, {{447},{  9}}, {{127},{  9}}, {{383},{  9}}, {{255},{  9}},
-{{511},{  9}}, {{  0},{  7}}, {{ 64},{  7}}, {{ 32},{  7}}, {{ 96},{  7}},
-{{ 16},{  7}}, {{ 80},{  7}}, {{ 48},{  7}}, {{112},{  7}}, {{  8},{  7}},
-{{ 72},{  7}}, {{ 40},{  7}}, {{104},{  7}}, {{ 24},{  7}}, {{ 88},{  7}},
-{{ 56},{  7}}, {{120},{  7}}, {{  4},{  7}}, {{ 68},{  7}}, {{ 36},{  7}},
-{{100},{  7}}, {{ 20},{  7}}, {{ 84},{  7}}, {{ 52},{  7}}, {{116},{  7}},
-{{  3},{  8}}, {{131},{  8}}, {{ 67},{  8}}, {{195},{  8}}, {{ 35},{  8}},
-{{163},{  8}}, {{ 99},{  8}}, {{227},{  8}}
-};
-
-local const ct_data static_dtree[D_CODES] = {
-{{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}},
-{{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}},
-{{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}},
-{{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}},
-{{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}},
-{{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}}
-};
-
-const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {
- 0,  1,  2,  3,  4,  4,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,  8,  8,  8,  8,
- 8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10,
-10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
-11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13,
-13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
-15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,  0,  0, 16, 17,
-18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22,
-23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
-26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
-27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
-27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
-28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
-28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
-28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
-29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
-29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
-29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29
-};
-
-const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {
- 0,  1,  2,  3,  4,  5,  6,  7,  8,  8,  9,  9, 10, 10, 11, 11, 12, 12, 12, 12,
-13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
-17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
-19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
-21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
-22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
-23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
-25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
-26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
-27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28
-};
-
-local const int base_length[LENGTH_CODES] = {
-0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
-64, 80, 96, 112, 128, 160, 192, 224, 0
-};
-
-local const int base_dist[D_CODES] = {
-    0,     1,     2,     3,     4,     6,     8,    12,    16,    24,
-   32,    48,    64,    96,   128,   192,   256,   384,   512,   768,
- 1024,  1536,  2048,  3072,  4096,  6144,  8192, 12288, 16384, 24576
-};
-
diff --git a/dep/zlib/src/uncompr.c b/dep/zlib/src/uncompr.c
deleted file mode 100644
index f03a1a865..000000000
--- a/dep/zlib/src/uncompr.c
+++ /dev/null
@@ -1,93 +0,0 @@
-/* uncompr.c -- decompress a memory buffer
- * Copyright (C) 1995-2003, 2010, 2014, 2016 Jean-loup Gailly, Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/* @(#) $Id$ */
-
-#define ZLIB_INTERNAL
-#include "zlib.h"
-
-/* ===========================================================================
-     Decompresses the source buffer into the destination buffer.  *sourceLen is
-   the byte length of the source buffer. Upon entry, *destLen is the total size
-   of the destination buffer, which must be large enough to hold the entire
-   uncompressed data. (The size of the uncompressed data must have been saved
-   previously by the compressor and transmitted to the decompressor by some
-   mechanism outside the scope of this compression library.) Upon exit,
-   *destLen is the size of the decompressed data and *sourceLen is the number
-   of source bytes consumed. Upon return, source + *sourceLen points to the
-   first unused input byte.
-
-     uncompress returns Z_OK if success, Z_MEM_ERROR if there was not enough
-   memory, Z_BUF_ERROR if there was not enough room in the output buffer, or
-   Z_DATA_ERROR if the input data was corrupted, including if the input data is
-   an incomplete zlib stream.
-*/
-int ZEXPORT uncompress2 (dest, destLen, source, sourceLen)
-    Bytef *dest;
-    uLongf *destLen;
-    const Bytef *source;
-    uLong *sourceLen;
-{
-    z_stream stream;
-    int err;
-    const uInt max = (uInt)-1;
-    uLong len, left;
-    Byte buf[1];    /* for detection of incomplete stream when *destLen == 0 */
-
-    len = *sourceLen;
-    if (*destLen) {
-        left = *destLen;
-        *destLen = 0;
-    }
-    else {
-        left = 1;
-        dest = buf;
-    }
-
-    stream.next_in = (z_const Bytef *)source;
-    stream.avail_in = 0;
-    stream.zalloc = (alloc_func)0;
-    stream.zfree = (free_func)0;
-    stream.opaque = (voidpf)0;
-
-    err = inflateInit(&stream);
-    if (err != Z_OK) return err;
-
-    stream.next_out = dest;
-    stream.avail_out = 0;
-
-    do {
-        if (stream.avail_out == 0) {
-            stream.avail_out = left > (uLong)max ? max : (uInt)left;
-            left -= stream.avail_out;
-        }
-        if (stream.avail_in == 0) {
-            stream.avail_in = len > (uLong)max ? max : (uInt)len;
-            len -= stream.avail_in;
-        }
-        err = inflate(&stream, Z_NO_FLUSH);
-    } while (err == Z_OK);
-
-    *sourceLen -= len + stream.avail_in;
-    if (dest != buf)
-        *destLen = stream.total_out;
-    else if (stream.total_out && err == Z_BUF_ERROR)
-        left = 1;
-
-    inflateEnd(&stream);
-    return err == Z_STREAM_END ? Z_OK :
-           err == Z_NEED_DICT ? Z_DATA_ERROR  :
-           err == Z_BUF_ERROR && left + stream.avail_out ? Z_DATA_ERROR :
-           err;
-}
-
-int ZEXPORT uncompress (dest, destLen, source, sourceLen)
-    Bytef *dest;
-    uLongf *destLen;
-    const Bytef *source;
-    uLong sourceLen;
-{
-    return uncompress2(dest, destLen, source, &sourceLen);
-}
diff --git a/dep/zlib/src/zutil.c b/dep/zlib/src/zutil.c
deleted file mode 100644
index a76c6b0c7..000000000
--- a/dep/zlib/src/zutil.c
+++ /dev/null
@@ -1,325 +0,0 @@
-/* zutil.c -- target dependent utility functions for the compression library
- * Copyright (C) 1995-2017 Jean-loup Gailly
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/* @(#) $Id$ */
-
-#include "zutil.h"
-#ifndef Z_SOLO
-#  include "gzguts.h"
-#endif
-
-z_const char * const z_errmsg[10] = {
-    (z_const char *)"need dictionary",     /* Z_NEED_DICT       2  */
-    (z_const char *)"stream end",          /* Z_STREAM_END      1  */
-    (z_const char *)"",                    /* Z_OK              0  */
-    (z_const char *)"file error",          /* Z_ERRNO         (-1) */
-    (z_const char *)"stream error",        /* Z_STREAM_ERROR  (-2) */
-    (z_const char *)"data error",          /* Z_DATA_ERROR    (-3) */
-    (z_const char *)"insufficient memory", /* Z_MEM_ERROR     (-4) */
-    (z_const char *)"buffer error",        /* Z_BUF_ERROR     (-5) */
-    (z_const char *)"incompatible version",/* Z_VERSION_ERROR (-6) */
-    (z_const char *)""
-};
-
-
-const char * ZEXPORT zlibVersion()
-{
-    return ZLIB_VERSION;
-}
-
-uLong ZEXPORT zlibCompileFlags()
-{
-    uLong flags;
-
-    flags = 0;
-    switch ((int)(sizeof(uInt))) {
-    case 2:     break;
-    case 4:     flags += 1;     break;
-    case 8:     flags += 2;     break;
-    default:    flags += 3;
-    }
-    switch ((int)(sizeof(uLong))) {
-    case 2:     break;
-    case 4:     flags += 1 << 2;        break;
-    case 8:     flags += 2 << 2;        break;
-    default:    flags += 3 << 2;
-    }
-    switch ((int)(sizeof(voidpf))) {
-    case 2:     break;
-    case 4:     flags += 1 << 4;        break;
-    case 8:     flags += 2 << 4;        break;
-    default:    flags += 3 << 4;
-    }
-    switch ((int)(sizeof(z_off_t))) {
-    case 2:     break;
-    case 4:     flags += 1 << 6;        break;
-    case 8:     flags += 2 << 6;        break;
-    default:    flags += 3 << 6;
-    }
-#ifdef ZLIB_DEBUG
-    flags += 1 << 8;
-#endif
-#if defined(ASMV) || defined(ASMINF)
-    flags += 1 << 9;
-#endif
-#ifdef ZLIB_WINAPI
-    flags += 1 << 10;
-#endif
-#ifdef BUILDFIXED
-    flags += 1 << 12;
-#endif
-#ifdef DYNAMIC_CRC_TABLE
-    flags += 1 << 13;
-#endif
-#ifdef NO_GZCOMPRESS
-    flags += 1L << 16;
-#endif
-#ifdef NO_GZIP
-    flags += 1L << 17;
-#endif
-#ifdef PKZIP_BUG_WORKAROUND
-    flags += 1L << 20;
-#endif
-#ifdef FASTEST
-    flags += 1L << 21;
-#endif
-#if defined(STDC) || defined(Z_HAVE_STDARG_H)
-#  ifdef NO_vsnprintf
-    flags += 1L << 25;
-#    ifdef HAS_vsprintf_void
-    flags += 1L << 26;
-#    endif
-#  else
-#    ifdef HAS_vsnprintf_void
-    flags += 1L << 26;
-#    endif
-#  endif
-#else
-    flags += 1L << 24;
-#  ifdef NO_snprintf
-    flags += 1L << 25;
-#    ifdef HAS_sprintf_void
-    flags += 1L << 26;
-#    endif
-#  else
-#    ifdef HAS_snprintf_void
-    flags += 1L << 26;
-#    endif
-#  endif
-#endif
-    return flags;
-}
-
-#ifdef ZLIB_DEBUG
-#include <stdlib.h>
-#  ifndef verbose
-#    define verbose 0
-#  endif
-int ZLIB_INTERNAL z_verbose = verbose;
-
-void ZLIB_INTERNAL z_error (m)
-    char *m;
-{
-    fprintf(stderr, "%s\n", m);
-    exit(1);
-}
-#endif
-
-/* exported to allow conversion of error code to string for compress() and
- * uncompress()
- */
-const char * ZEXPORT zError(err)
-    int err;
-{
-    return ERR_MSG(err);
-}
-
-#if defined(_WIN32_WCE)
-    /* The Microsoft C Run-Time Library for Windows CE doesn't have
-     * errno.  We define it as a global variable to simplify porting.
-     * Its value is always 0 and should not be used.
-     */
-    int errno = 0;
-#endif
-
-#ifndef HAVE_MEMCPY
-
-void ZLIB_INTERNAL zmemcpy(dest, source, len)
-    Bytef* dest;
-    const Bytef* source;
-    uInt  len;
-{
-    if (len == 0) return;
-    do {
-        *dest++ = *source++; /* ??? to be unrolled */
-    } while (--len != 0);
-}
-
-int ZLIB_INTERNAL zmemcmp(s1, s2, len)
-    const Bytef* s1;
-    const Bytef* s2;
-    uInt  len;
-{
-    uInt j;
-
-    for (j = 0; j < len; j++) {
-        if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1;
-    }
-    return 0;
-}
-
-void ZLIB_INTERNAL zmemzero(dest, len)
-    Bytef* dest;
-    uInt  len;
-{
-    if (len == 0) return;
-    do {
-        *dest++ = 0;  /* ??? to be unrolled */
-    } while (--len != 0);
-}
-#endif
-
-#ifndef Z_SOLO
-
-#ifdef SYS16BIT
-
-#ifdef __TURBOC__
-/* Turbo C in 16-bit mode */
-
-#  define MY_ZCALLOC
-
-/* Turbo C malloc() does not allow dynamic allocation of 64K bytes
- * and farmalloc(64K) returns a pointer with an offset of 8, so we
- * must fix the pointer. Warning: the pointer must be put back to its
- * original form in order to free it, use zcfree().
- */
-
-#define MAX_PTR 10
-/* 10*64K = 640K */
-
-local int next_ptr = 0;
-
-typedef struct ptr_table_s {
-    voidpf org_ptr;
-    voidpf new_ptr;
-} ptr_table;
-
-local ptr_table table[MAX_PTR];
-/* This table is used to remember the original form of pointers
- * to large buffers (64K). Such pointers are normalized with a zero offset.
- * Since MSDOS is not a preemptive multitasking OS, this table is not
- * protected from concurrent access. This hack doesn't work anyway on
- * a protected system like OS/2. Use Microsoft C instead.
- */
-
-voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, unsigned items, unsigned size)
-{
-    voidpf buf;
-    ulg bsize = (ulg)items*size;
-
-    (void)opaque;
-
-    /* If we allocate less than 65520 bytes, we assume that farmalloc
-     * will return a usable pointer which doesn't have to be normalized.
-     */
-    if (bsize < 65520L) {
-        buf = farmalloc(bsize);
-        if (*(ush*)&buf != 0) return buf;
-    } else {
-        buf = farmalloc(bsize + 16L);
-    }
-    if (buf == NULL || next_ptr >= MAX_PTR) return NULL;
-    table[next_ptr].org_ptr = buf;
-
-    /* Normalize the pointer to seg:0 */
-    *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4;
-    *(ush*)&buf = 0;
-    table[next_ptr++].new_ptr = buf;
-    return buf;
-}
-
-void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr)
-{
-    int n;
-
-    (void)opaque;
-
-    if (*(ush*)&ptr != 0) { /* object < 64K */
-        farfree(ptr);
-        return;
-    }
-    /* Find the original pointer */
-    for (n = 0; n < next_ptr; n++) {
-        if (ptr != table[n].new_ptr) continue;
-
-        farfree(table[n].org_ptr);
-        while (++n < next_ptr) {
-            table[n-1] = table[n];
-        }
-        next_ptr--;
-        return;
-    }
-    Assert(0, "zcfree: ptr not found");
-}
-
-#endif /* __TURBOC__ */
-
-
-#ifdef M_I86
-/* Microsoft C in 16-bit mode */
-
-#  define MY_ZCALLOC
-
-#if (!defined(_MSC_VER) || (_MSC_VER <= 600))
-#  define _halloc  halloc
-#  define _hfree   hfree
-#endif
-
-voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, uInt items, uInt size)
-{
-    (void)opaque;
-    return _halloc((long)items, size);
-}
-
-void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr)
-{
-    (void)opaque;
-    _hfree(ptr);
-}
-
-#endif /* M_I86 */
-
-#endif /* SYS16BIT */
-
-
-#ifndef MY_ZCALLOC /* Any system without a special alloc function */
-
-#ifndef STDC
-extern voidp  malloc OF((uInt size));
-extern voidp  calloc OF((uInt items, uInt size));
-extern void   free   OF((voidpf ptr));
-#endif
-
-voidpf ZLIB_INTERNAL zcalloc (opaque, items, size)
-    voidpf opaque;
-    unsigned items;
-    unsigned size;
-{
-    (void)opaque;
-    return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) :
-                              (voidpf)calloc(items, size);
-}
-
-void ZLIB_INTERNAL zcfree (opaque, ptr)
-    voidpf opaque;
-    voidpf ptr;
-{
-    (void)opaque;
-    free(ptr);
-}
-
-#endif /* MY_ZCALLOC */
-
-#endif /* !Z_SOLO */
diff --git a/dep/zlib/src/zutil.h b/dep/zlib/src/zutil.h
deleted file mode 100644
index b079ea6a8..000000000
--- a/dep/zlib/src/zutil.h
+++ /dev/null
@@ -1,271 +0,0 @@
-/* zutil.h -- internal interface and configuration of the compression library
- * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/* WARNING: this file should *not* be used by applications. It is
-   part of the implementation of the compression library and is
-   subject to change. Applications should only use zlib.h.
- */
-
-/* @(#) $Id$ */
-
-#ifndef ZUTIL_H
-#define ZUTIL_H
-
-#ifdef HAVE_HIDDEN
-#  define ZLIB_INTERNAL __attribute__((visibility ("hidden")))
-#else
-#  define ZLIB_INTERNAL
-#endif
-
-#include "zlib.h"
-
-#if defined(STDC) && !defined(Z_SOLO)
-#  if !(defined(_WIN32_WCE) && defined(_MSC_VER))
-#    include <stddef.h>
-#  endif
-#  include <string.h>
-#  include <stdlib.h>
-#endif
-
-#ifdef Z_SOLO
-   typedef long ptrdiff_t;  /* guess -- will be caught if guess is wrong */
-#endif
-
-#ifndef local
-#  define local static
-#endif
-/* since "static" is used to mean two completely different things in C, we
-   define "local" for the non-static meaning of "static", for readability
-   (compile with -Dlocal if your debugger can't find static symbols) */
-
-typedef unsigned char  uch;
-typedef uch FAR uchf;
-typedef unsigned short ush;
-typedef ush FAR ushf;
-typedef unsigned long  ulg;
-
-extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
-/* (size given to avoid silly warnings with Visual C++) */
-
-#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)]
-
-#define ERR_RETURN(strm,err) \
-  return (strm->msg = ERR_MSG(err), (err))
-/* To be used only when the state is known to be valid */
-
-        /* common constants */
-
-#ifndef DEF_WBITS
-#  define DEF_WBITS MAX_WBITS
-#endif
-/* default windowBits for decompression. MAX_WBITS is for compression only */
-
-#if MAX_MEM_LEVEL >= 8
-#  define DEF_MEM_LEVEL 8
-#else
-#  define DEF_MEM_LEVEL  MAX_MEM_LEVEL
-#endif
-/* default memLevel */
-
-#define STORED_BLOCK 0
-#define STATIC_TREES 1
-#define DYN_TREES    2
-/* The three kinds of block type */
-
-#define MIN_MATCH  3
-#define MAX_MATCH  258
-/* The minimum and maximum match lengths */
-
-#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */
-
-        /* target dependencies */
-
-#if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32))
-#  define OS_CODE  0x00
-#  ifndef Z_SOLO
-#    if defined(__TURBOC__) || defined(__BORLANDC__)
-#      if (__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__))
-         /* Allow compilation with ANSI keywords only enabled */
-         void _Cdecl farfree( void *block );
-         void *_Cdecl farmalloc( unsigned long nbytes );
-#      else
-#        include <alloc.h>
-#      endif
-#    else /* MSC or DJGPP */
-#      include <malloc.h>
-#    endif
-#  endif
-#endif
-
-#ifdef AMIGA
-#  define OS_CODE  1
-#endif
-
-#if defined(VAXC) || defined(VMS)
-#  define OS_CODE  2
-#  define F_OPEN(name, mode) \
-     fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512")
-#endif
-
-#ifdef __370__
-#  if __TARGET_LIB__ < 0x20000000
-#    define OS_CODE 4
-#  elif __TARGET_LIB__ < 0x40000000
-#    define OS_CODE 11
-#  else
-#    define OS_CODE 8
-#  endif
-#endif
-
-#if defined(ATARI) || defined(atarist)
-#  define OS_CODE  5
-#endif
-
-#ifdef OS2
-#  define OS_CODE  6
-#  if defined(M_I86) && !defined(Z_SOLO)
-#    include <malloc.h>
-#  endif
-#endif
-
-#if defined(MACOS) || defined(TARGET_OS_MAC)
-#  define OS_CODE  7
-#  ifndef Z_SOLO
-#    if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os
-#      include <unix.h> /* for fdopen */
-#    else
-#      ifndef fdopen
-#        define fdopen(fd,mode) NULL /* No fdopen() */
-#      endif
-#    endif
-#  endif
-#endif
-
-#ifdef __acorn
-#  define OS_CODE 13
-#endif
-
-#if defined(WIN32) && !defined(__CYGWIN__)
-#  define OS_CODE  10
-#endif
-
-#ifdef _BEOS_
-#  define OS_CODE  16
-#endif
-
-#ifdef __TOS_OS400__
-#  define OS_CODE 18
-#endif
-
-#ifdef __APPLE__
-#  define OS_CODE 19
-#endif
-
-#if defined(_BEOS_) || defined(RISCOS)
-#  define fdopen(fd,mode) NULL /* No fdopen() */
-#endif
-
-#if (defined(_MSC_VER) && (_MSC_VER > 600)) && !defined __INTERIX
-#  if defined(_WIN32_WCE)
-#    define fdopen(fd,mode) NULL /* No fdopen() */
-#    ifndef _PTRDIFF_T_DEFINED
-       typedef int ptrdiff_t;
-#      define _PTRDIFF_T_DEFINED
-#    endif
-#  else
-#    define fdopen(fd,type)  _fdopen(fd,type)
-#  endif
-#endif
-
-#if defined(__BORLANDC__) && !defined(MSDOS)
-  #pragma warn -8004
-  #pragma warn -8008
-  #pragma warn -8066
-#endif
-
-/* provide prototypes for these when building zlib without LFS */
-#if !defined(_WIN32) && \
-    (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0)
-    ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t));
-    ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t));
-#endif
-
-        /* common defaults */
-
-#ifndef OS_CODE
-#  define OS_CODE  3     /* assume Unix */
-#endif
-
-#ifndef F_OPEN
-#  define F_OPEN(name, mode) fopen((name), (mode))
-#endif
-
-         /* functions */
-
-#if defined(pyr) || defined(Z_SOLO)
-#  define NO_MEMCPY
-#endif
-#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__)
- /* Use our own functions for small and medium model with MSC <= 5.0.
-  * You may have to use the same strategy for Borland C (untested).
-  * The __SC__ check is for Symantec.
-  */
-#  define NO_MEMCPY
-#endif
-#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY)
-#  define HAVE_MEMCPY
-#endif
-#ifdef HAVE_MEMCPY
-#  ifdef SMALL_MEDIUM /* MSDOS small or medium model */
-#    define zmemcpy _fmemcpy
-#    define zmemcmp _fmemcmp
-#    define zmemzero(dest, len) _fmemset(dest, 0, len)
-#  else
-#    define zmemcpy memcpy
-#    define zmemcmp memcmp
-#    define zmemzero(dest, len) memset(dest, 0, len)
-#  endif
-#else
-   void ZLIB_INTERNAL zmemcpy OF((Bytef* dest, const Bytef* source, uInt len));
-   int ZLIB_INTERNAL zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len));
-   void ZLIB_INTERNAL zmemzero OF((Bytef* dest, uInt len));
-#endif
-
-/* Diagnostic functions */
-#ifdef ZLIB_DEBUG
-#  include <stdio.h>
-   extern int ZLIB_INTERNAL z_verbose;
-   extern void ZLIB_INTERNAL z_error OF((char *m));
-#  define Assert(cond,msg) {if(!(cond)) z_error(msg);}
-#  define Trace(x) {if (z_verbose>=0) fprintf x ;}
-#  define Tracev(x) {if (z_verbose>0) fprintf x ;}
-#  define Tracevv(x) {if (z_verbose>1) fprintf x ;}
-#  define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;}
-#  define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;}
-#else
-#  define Assert(cond,msg)
-#  define Trace(x)
-#  define Tracev(x)
-#  define Tracevv(x)
-#  define Tracec(c,x)
-#  define Tracecv(c,x)
-#endif
-
-#ifndef Z_SOLO
-   voidpf ZLIB_INTERNAL zcalloc OF((voidpf opaque, unsigned items,
-                                    unsigned size));
-   void ZLIB_INTERNAL zcfree  OF((voidpf opaque, voidpf ptr));
-#endif
-
-#define ZALLOC(strm, items, size) \
-           (*((strm)->zalloc))((strm)->opaque, (items), (size))
-#define ZFREE(strm, addr)  (*((strm)->zfree))((strm)->opaque, (voidpf)(addr))
-#define TRY_FREE(s, p) {if (p) ZFREE(s, p);}
-
-/* Reverse the bytes in a 32-bit value */
-#define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \
-                    (((q) & 0xff00) << 8) + (((q) & 0xff) << 24))
-
-#endif /* ZUTIL_H */
diff --git a/dep/zlib/zlib.vcxproj b/dep/zlib/zlib.vcxproj
deleted file mode 100644
index 1bfd57f46..000000000
--- a/dep/zlib/zlib.vcxproj
+++ /dev/null
@@ -1,50 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <Import Project="..\msvc\vsprops\Configurations.props" />
-
-  <ItemGroup>
-    <ClInclude Include="include\zconf.h" />
-    <ClInclude Include="include\zlib.h" />
-    <ClInclude Include="src\crc32.h" />
-    <ClInclude Include="src\deflate.h" />
-    <ClInclude Include="src\gzguts.h" />
-    <ClInclude Include="src\inffast.h" />
-    <ClInclude Include="src\inffixed.h" />
-    <ClInclude Include="src\inflate.h" />
-    <ClInclude Include="src\inftrees.h" />
-    <ClInclude Include="src\trees.h" />
-    <ClInclude Include="src\zutil.h" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="src\adler32.c" />
-    <ClCompile Include="src\compress.c" />
-    <ClCompile Include="src\crc32.c" />
-    <ClCompile Include="src\deflate.c" />
-    <ClCompile Include="src\gzclose.c" />
-    <ClCompile Include="src\gzlib.c" />
-    <ClCompile Include="src\gzread.c" />
-    <ClCompile Include="src\gzwrite.c" />
-    <ClCompile Include="src\infback.c" />
-    <ClCompile Include="src\inffast.c" />
-    <ClCompile Include="src\inflate.c" />
-    <ClCompile Include="src\inftrees.c" />
-    <ClCompile Include="src\trees.c" />
-    <ClCompile Include="src\uncompr.c" />
-    <ClCompile Include="src\zutil.c" />
-  </ItemGroup>
-
-  <PropertyGroup Label="Globals">
-    <ProjectGuid>{7FF9FDB9-D504-47DB-A16A-B08071999620}</ProjectGuid>
-  </PropertyGroup>
-
-  <Import Project="..\msvc\vsprops\StaticLibrary.props" />
-
-  <ItemDefinitionGroup>
-    <ClCompile>
-      <WarningLevel>TurnOffAllWarnings</WarningLevel>
-      <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-    </ClCompile>
-  </ItemDefinitionGroup>
-
-  <Import Project="..\msvc\vsprops\Targets.props" />
-</Project>
\ No newline at end of file
diff --git a/dep/zlib/zlib.vcxproj.filters b/dep/zlib/zlib.vcxproj.filters
deleted file mode 100644
index 93a8b89a4..000000000
--- a/dep/zlib/zlib.vcxproj.filters
+++ /dev/null
@@ -1,33 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup>
-    <ClInclude Include="include\zconf.h" />
-    <ClInclude Include="include\zlib.h" />
-    <ClInclude Include="src\crc32.h" />
-    <ClInclude Include="src\deflate.h" />
-    <ClInclude Include="src\gzguts.h" />
-    <ClInclude Include="src\inffast.h" />
-    <ClInclude Include="src\inffixed.h" />
-    <ClInclude Include="src\inflate.h" />
-    <ClInclude Include="src\inftrees.h" />
-    <ClInclude Include="src\trees.h" />
-    <ClInclude Include="src\zutil.h" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="src\crc32.c" />
-    <ClCompile Include="src\deflate.c" />
-    <ClCompile Include="src\gzclose.c" />
-    <ClCompile Include="src\gzlib.c" />
-    <ClCompile Include="src\gzread.c" />
-    <ClCompile Include="src\gzwrite.c" />
-    <ClCompile Include="src\infback.c" />
-    <ClCompile Include="src\inffast.c" />
-    <ClCompile Include="src\inflate.c" />
-    <ClCompile Include="src\inftrees.c" />
-    <ClCompile Include="src\trees.c" />
-    <ClCompile Include="src\uncompr.c" />
-    <ClCompile Include="src\zutil.c" />
-    <ClCompile Include="src\adler32.c" />
-    <ClCompile Include="src\compress.c" />
-  </ItemGroup>
-</Project>
\ No newline at end of file
diff --git a/dep/zstd/CMakeLists.txt b/dep/zstd/CMakeLists.txt
deleted file mode 100644
index 12a41904f..000000000
--- a/dep/zstd/CMakeLists.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-enable_language(C ASM)
-
-add_library(zstd
-  lib/common/debug.c
-  lib/common/entropy_common.c
-  lib/common/error_private.c
-  lib/common/fse_decompress.c
-  lib/common/pool.c
-  lib/common/threading.c
-  lib/common/xxhash.c
-  lib/common/zstd_common.c
-  lib/compress/fse_compress.c
-  lib/compress/hist.c
-  lib/compress/huf_compress.c
-  lib/compress/zstd_compress.c
-  lib/compress/zstd_compress_literals.c
-  lib/compress/zstd_compress_sequences.c
-  lib/compress/zstd_compress_superblock.c
-  lib/compress/zstd_double_fast.c
-  lib/compress/zstd_fast.c
-  lib/compress/zstd_lazy.c
-  lib/compress/zstd_ldm.c
-  lib/compress/zstdmt_compress.c
-  lib/compress/zstd_opt.c
-  lib/decompress/huf_decompress.c
-  lib/decompress/zstd_ddict.c
-  lib/decompress/zstd_decompress_block.c
-  lib/decompress/zstd_decompress.c
-)
-
-if(NOT MSVC AND CPU_ARCH_X64)
-  target_sources(zstd PRIVATE lib/decompress/huf_decompress_amd64.S)
-endif()
-
-target_include_directories(zstd PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/lib")
-disable_compiler_warnings_for_target(zstd)
-
-add_library(Zstd::Zstd ALIAS zstd)
diff --git a/dep/zstd/LICENSE b/dep/zstd/LICENSE
deleted file mode 100644
index a793a8028..000000000
--- a/dep/zstd/LICENSE
+++ /dev/null
@@ -1,30 +0,0 @@
-BSD License
-
-For Zstandard software
-
-Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-
- * Neither the name Facebook nor the names of its contributors may be used to
-   endorse or promote products derived from this software without specific
-   prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/dep/zstd/lib/common/bitstream.h b/dep/zstd/lib/common/bitstream.h
deleted file mode 100644
index 84b6062ff..000000000
--- a/dep/zstd/lib/common/bitstream.h
+++ /dev/null
@@ -1,478 +0,0 @@
-/* ******************************************************************
- * bitstream
- * Part of FSE library
- * Copyright (c) Yann Collet, Facebook, Inc.
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
-****************************************************************** */
-#ifndef BITSTREAM_H_MODULE
-#define BITSTREAM_H_MODULE
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-/*
-*  This API consists of small unitary functions, which must be inlined for best performance.
-*  Since link-time-optimization is not available for all compilers,
-*  these functions are defined into a .h to be included.
-*/
-
-/*-****************************************
-*  Dependencies
-******************************************/
-#include "mem.h"            /* unaligned access routines */
-#include "compiler.h"       /* UNLIKELY() */
-#include "debug.h"          /* assert(), DEBUGLOG(), RAWLOG() */
-#include "error_private.h"  /* error codes and messages */
-
-
-/*=========================================
-*  Target specific
-=========================================*/
-#ifndef ZSTD_NO_INTRINSICS
-#  if defined(__BMI__) && defined(__GNUC__)
-#    include <immintrin.h>   /* support for bextr (experimental) */
-#  elif defined(__ICCARM__)
-#    include <intrinsics.h>
-#  endif
-#endif
-
-#define STREAM_ACCUMULATOR_MIN_32  25
-#define STREAM_ACCUMULATOR_MIN_64  57
-#define STREAM_ACCUMULATOR_MIN    ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
-
-
-/*-******************************************
-*  bitStream encoding API (write forward)
-********************************************/
-/* bitStream can mix input from multiple sources.
- * A critical property of these streams is that they encode and decode in **reverse** direction.
- * So the first bit sequence you add will be the last to be read, like a LIFO stack.
- */
-typedef struct {
-    size_t bitContainer;
-    unsigned bitPos;
-    char*  startPtr;
-    char*  ptr;
-    char*  endPtr;
-} BIT_CStream_t;
-
-MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
-MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
-MEM_STATIC void   BIT_flushBits(BIT_CStream_t* bitC);
-MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
-
-/* Start with initCStream, providing the size of buffer to write into.
-*  bitStream will never write outside of this buffer.
-*  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
-*
-*  bits are first added to a local register.
-*  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
-*  Writing data into memory is an explicit operation, performed by the flushBits function.
-*  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
-*  After a flushBits, a maximum of 7 bits might still be stored into local register.
-*
-*  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
-*
-*  Last operation is to close the bitStream.
-*  The function returns the final size of CStream in bytes.
-*  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
-*/
-
-
-/*-********************************************
-*  bitStream decoding API (read backward)
-**********************************************/
-typedef struct {
-    size_t   bitContainer;
-    unsigned bitsConsumed;
-    const char* ptr;
-    const char* start;
-    const char* limitPtr;
-} BIT_DStream_t;
-
-typedef enum { BIT_DStream_unfinished = 0,
-               BIT_DStream_endOfBuffer = 1,
-               BIT_DStream_completed = 2,
-               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
-               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
-
-MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
-MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
-MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
-MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
-
-
-/* Start by invoking BIT_initDStream().
-*  A chunk of the bitStream is then stored into a local register.
-*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
-*  You can then retrieve bitFields stored into the local register, **in reverse order**.
-*  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
-*  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
-*  Otherwise, it can be less than that, so proceed accordingly.
-*  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
-*/
-
-
-/*-****************************************
-*  unsafe API
-******************************************/
-MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
-/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
-
-MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
-/* unsafe version; does not check buffer overflow */
-
-MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
-/* faster, but works only if nbBits >= 1 */
-
-
-
-/*-**************************************************************
-*  Internal functions
-****************************************************************/
-MEM_STATIC unsigned BIT_highbit32 (U32 val)
-{
-    assert(val != 0);
-    {
-#   if defined(_MSC_VER)   /* Visual */
-#       if STATIC_BMI2 == 1
-            return _lzcnt_u32(val) ^ 31;
-#       else
-            if (val != 0) {
-                unsigned long r;
-                _BitScanReverse(&r, val);
-                return (unsigned)r;
-            } else {
-                /* Should not reach this code path */
-                __assume(0);
-            }
-#       endif
-#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
-        return __builtin_clz (val) ^ 31;
-#   elif defined(__ICCARM__)    /* IAR Intrinsic */
-        return 31 - __CLZ(val);
-#   else   /* Software version */
-        static const unsigned DeBruijnClz[32] = { 0,  9,  1, 10, 13, 21,  2, 29,
-                                                 11, 14, 16, 18, 22, 25,  3, 30,
-                                                  8, 12, 20, 28, 15, 17, 24,  7,
-                                                 19, 27, 23,  6, 26,  5,  4, 31 };
-        U32 v = val;
-        v |= v >> 1;
-        v |= v >> 2;
-        v |= v >> 4;
-        v |= v >> 8;
-        v |= v >> 16;
-        return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
-#   endif
-    }
-}
-
-/*=====    Local Constants   =====*/
-static const unsigned BIT_mask[] = {
-    0,          1,         3,         7,         0xF,       0x1F,
-    0x3F,       0x7F,      0xFF,      0x1FF,     0x3FF,     0x7FF,
-    0xFFF,      0x1FFF,    0x3FFF,    0x7FFF,    0xFFFF,    0x1FFFF,
-    0x3FFFF,    0x7FFFF,   0xFFFFF,   0x1FFFFF,  0x3FFFFF,  0x7FFFFF,
-    0xFFFFFF,   0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
-    0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
-#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
-
-/*-**************************************************************
-*  bitStream encoding
-****************************************************************/
-/*! BIT_initCStream() :
- *  `dstCapacity` must be > sizeof(size_t)
- *  @return : 0 if success,
- *            otherwise an error code (can be tested using ERR_isError()) */
-MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
-                                  void* startPtr, size_t dstCapacity)
-{
-    bitC->bitContainer = 0;
-    bitC->bitPos = 0;
-    bitC->startPtr = (char*)startPtr;
-    bitC->ptr = bitC->startPtr;
-    bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
-    if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
-    return 0;
-}
-
-/*! BIT_addBits() :
- *  can add up to 31 bits into `bitC`.
- *  Note : does not check for register overflow ! */
-MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
-                            size_t value, unsigned nbBits)
-{
-    DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
-    assert(nbBits < BIT_MASK_SIZE);
-    assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
-    bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
-    bitC->bitPos += nbBits;
-}
-
-/*! BIT_addBitsFast() :
- *  works only if `value` is _clean_,
- *  meaning all high bits above nbBits are 0 */
-MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
-                                size_t value, unsigned nbBits)
-{
-    assert((value>>nbBits) == 0);
-    assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
-    bitC->bitContainer |= value << bitC->bitPos;
-    bitC->bitPos += nbBits;
-}
-
-/*! BIT_flushBitsFast() :
- *  assumption : bitContainer has not overflowed
- *  unsafe version; does not check buffer overflow */
-MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
-{
-    size_t const nbBytes = bitC->bitPos >> 3;
-    assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
-    assert(bitC->ptr <= bitC->endPtr);
-    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
-    bitC->ptr += nbBytes;
-    bitC->bitPos &= 7;
-    bitC->bitContainer >>= nbBytes*8;
-}
-
-/*! BIT_flushBits() :
- *  assumption : bitContainer has not overflowed
- *  safe version; check for buffer overflow, and prevents it.
- *  note : does not signal buffer overflow.
- *  overflow will be revealed later on using BIT_closeCStream() */
-MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
-{
-    size_t const nbBytes = bitC->bitPos >> 3;
-    assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
-    assert(bitC->ptr <= bitC->endPtr);
-    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
-    bitC->ptr += nbBytes;
-    if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
-    bitC->bitPos &= 7;
-    bitC->bitContainer >>= nbBytes*8;
-}
-
-/*! BIT_closeCStream() :
- *  @return : size of CStream, in bytes,
- *            or 0 if it could not fit into dstBuffer */
-MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
-{
-    BIT_addBitsFast(bitC, 1, 1);   /* endMark */
-    BIT_flushBits(bitC);
-    if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
-    return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
-}
-
-
-/*-********************************************************
-*  bitStream decoding
-**********************************************************/
-/*! BIT_initDStream() :
- *  Initialize a BIT_DStream_t.
- * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
- * `srcSize` must be the *exact* size of the bitStream, in bytes.
- * @return : size of stream (== srcSize), or an errorCode if a problem is detected
- */
-MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
-{
-    if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
-
-    bitD->start = (const char*)srcBuffer;
-    bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
-
-    if (srcSize >=  sizeof(bitD->bitContainer)) {  /* normal case */
-        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
-        bitD->bitContainer = MEM_readLEST(bitD->ptr);
-        { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
-          bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;  /* ensures bitsConsumed is always set */
-          if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
-    } else {
-        bitD->ptr   = bitD->start;
-        bitD->bitContainer = *(const BYTE*)(bitD->start);
-        switch(srcSize)
-        {
-        case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
-                ZSTD_FALLTHROUGH;
-
-        case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
-                ZSTD_FALLTHROUGH;
-
-        case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
-                ZSTD_FALLTHROUGH;
-
-        case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
-                ZSTD_FALLTHROUGH;
-
-        case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
-                ZSTD_FALLTHROUGH;
-
-        case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
-                ZSTD_FALLTHROUGH;
-
-        default: break;
-        }
-        {   BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
-            bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
-            if (lastByte == 0) return ERROR(corruption_detected);  /* endMark not present */
-        }
-        bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
-    }
-
-    return srcSize;
-}
-
-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
-{
-    return bitContainer >> start;
-}
-
-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
-{
-    U32 const regMask = sizeof(bitContainer)*8 - 1;
-    /* if start > regMask, bitstream is corrupted, and result is undefined */
-    assert(nbBits < BIT_MASK_SIZE);
-    /* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
-     * than accessing memory. When bmi2 instruction is not present, we consider
-     * such cpus old (pre-Haswell, 2013) and their performance is not of that
-     * importance.
-     */
-#if defined(__x86_64__) || defined(_M_X86)
-    return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
-#else
-    return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
-#endif
-}
-
-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
-{
-#if defined(STATIC_BMI2) && STATIC_BMI2 == 1
-	return  _bzhi_u64(bitContainer, nbBits);
-#else
-    assert(nbBits < BIT_MASK_SIZE);
-    return bitContainer & BIT_mask[nbBits];
-#endif
-}
-
-/*! BIT_lookBits() :
- *  Provides next n bits from local register.
- *  local register is not modified.
- *  On 32-bits, maxNbBits==24.
- *  On 64-bits, maxNbBits==56.
- * @return : value extracted */
-MEM_STATIC  FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t*  bitD, U32 nbBits)
-{
-    /* arbitrate between double-shift and shift+mask */
-#if 1
-    /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
-     * bitstream is likely corrupted, and result is undefined */
-    return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
-#else
-    /* this code path is slower on my os-x laptop */
-    U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
-    return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
-#endif
-}
-
-/*! BIT_lookBitsFast() :
- *  unsafe version; only works if nbBits >= 1 */
-MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
-{
-    U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
-    assert(nbBits >= 1);
-    return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
-}
-
-MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
-{
-    bitD->bitsConsumed += nbBits;
-}
-
-/*! BIT_readBits() :
- *  Read (consume) next n bits from local register and update.
- *  Pay attention to not read more than nbBits contained into local register.
- * @return : extracted value. */
-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
-{
-    size_t const value = BIT_lookBits(bitD, nbBits);
-    BIT_skipBits(bitD, nbBits);
-    return value;
-}
-
-/*! BIT_readBitsFast() :
- *  unsafe version; only works only if nbBits >= 1 */
-MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
-{
-    size_t const value = BIT_lookBitsFast(bitD, nbBits);
-    assert(nbBits >= 1);
-    BIT_skipBits(bitD, nbBits);
-    return value;
-}
-
-/*! BIT_reloadDStreamFast() :
- *  Similar to BIT_reloadDStream(), but with two differences:
- *  1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
- *  2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
- *     point you must use BIT_reloadDStream() to reload.
- */
-MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
-{
-    if (UNLIKELY(bitD->ptr < bitD->limitPtr))
-        return BIT_DStream_overflow;
-    assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
-    bitD->ptr -= bitD->bitsConsumed >> 3;
-    bitD->bitsConsumed &= 7;
-    bitD->bitContainer = MEM_readLEST(bitD->ptr);
-    return BIT_DStream_unfinished;
-}
-
-/*! BIT_reloadDStream() :
- *  Refill `bitD` from buffer previously set in BIT_initDStream() .
- *  This function is safe, it guarantees it will not read beyond src buffer.
- * @return : status of `BIT_DStream_t` internal register.
- *           when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
-MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
-{
-    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* overflow detected, like end of stream */
-        return BIT_DStream_overflow;
-
-    if (bitD->ptr >= bitD->limitPtr) {
-        return BIT_reloadDStreamFast(bitD);
-    }
-    if (bitD->ptr == bitD->start) {
-        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
-        return BIT_DStream_completed;
-    }
-    /* start < ptr < limitPtr */
-    {   U32 nbBytes = bitD->bitsConsumed >> 3;
-        BIT_DStream_status result = BIT_DStream_unfinished;
-        if (bitD->ptr - nbBytes < bitD->start) {
-            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
-            result = BIT_DStream_endOfBuffer;
-        }
-        bitD->ptr -= nbBytes;
-        bitD->bitsConsumed -= nbBytes*8;
-        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
-        return result;
-    }
-}
-
-/*! BIT_endOfDStream() :
- * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
- */
-MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
-{
-    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
-}
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif /* BITSTREAM_H_MODULE */
diff --git a/dep/zstd/lib/common/compiler.h b/dep/zstd/lib/common/compiler.h
deleted file mode 100644
index 516930c01..000000000
--- a/dep/zstd/lib/common/compiler.h
+++ /dev/null
@@ -1,335 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef ZSTD_COMPILER_H
-#define ZSTD_COMPILER_H
-
-#include "portability_macros.h"
-
-/*-*******************************************************
-*  Compiler specifics
-*********************************************************/
-/* force inlining */
-
-#if !defined(ZSTD_NO_INLINE)
-#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
-#  define INLINE_KEYWORD inline
-#else
-#  define INLINE_KEYWORD
-#endif
-
-#if defined(__GNUC__) || defined(__ICCARM__)
-#  define FORCE_INLINE_ATTR __attribute__((always_inline))
-#elif defined(_MSC_VER)
-#  define FORCE_INLINE_ATTR __forceinline
-#else
-#  define FORCE_INLINE_ATTR
-#endif
-
-#else
-
-#define INLINE_KEYWORD
-#define FORCE_INLINE_ATTR
-
-#endif
-
-/**
-  On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
-  This explicitly marks such functions as __cdecl so that the code will still compile
-  if a CC other than __cdecl has been made the default.
-*/
-#if  defined(_MSC_VER)
-#  define WIN_CDECL __cdecl
-#else
-#  define WIN_CDECL
-#endif
-
-/**
- * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
- * parameters. They must be inlined for the compiler to eliminate the constant
- * branches.
- */
-#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
-/**
- * HINT_INLINE is used to help the compiler generate better code. It is *not*
- * used for "templates", so it can be tweaked based on the compilers
- * performance.
- *
- * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the
- * always_inline attribute.
- *
- * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline
- * attribute.
- */
-#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
-#  define HINT_INLINE static INLINE_KEYWORD
-#else
-#  define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
-#endif
-
-/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
-#if defined(__GNUC__)
-#  define UNUSED_ATTR __attribute__((unused))
-#else
-#  define UNUSED_ATTR
-#endif
-
-/* force no inlining */
-#ifdef _MSC_VER
-#  define FORCE_NOINLINE static __declspec(noinline)
-#else
-#  if defined(__GNUC__) || defined(__ICCARM__)
-#    define FORCE_NOINLINE static __attribute__((__noinline__))
-#  else
-#    define FORCE_NOINLINE static
-#  endif
-#endif
-
-
-/* target attribute */
-#if defined(__GNUC__) || defined(__ICCARM__)
-#  define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
-#else
-#  define TARGET_ATTRIBUTE(target)
-#endif
-
-/* Target attribute for BMI2 dynamic dispatch.
- * Enable lzcnt, bmi, and bmi2.
- * We test for bmi1 & bmi2. lzcnt is included in bmi1.
- */
-#define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2")
-
-/* prefetch
- * can be disabled, by declaring NO_PREFETCH build macro */
-#if defined(NO_PREFETCH)
-#  define PREFETCH_L1(ptr)  (void)(ptr)  /* disabled */
-#  define PREFETCH_L2(ptr)  (void)(ptr)  /* disabled */
-#else
-#  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))  /* _mm_prefetch() is not defined outside of x86/x64 */
-#    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
-#    define PREFETCH_L1(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
-#    define PREFETCH_L2(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
-#  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
-#    define PREFETCH_L1(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
-#    define PREFETCH_L2(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
-#  elif defined(__aarch64__)
-#    define PREFETCH_L1(ptr)  __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
-#    define PREFETCH_L2(ptr)  __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
-#  else
-#    define PREFETCH_L1(ptr) (void)(ptr)  /* disabled */
-#    define PREFETCH_L2(ptr) (void)(ptr)  /* disabled */
-#  endif
-#endif  /* NO_PREFETCH */
-
-#define CACHELINE_SIZE 64
-
-#define PREFETCH_AREA(p, s)  {            \
-    const char* const _ptr = (const char*)(p);  \
-    size_t const _size = (size_t)(s);     \
-    size_t _pos;                          \
-    for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) {  \
-        PREFETCH_L2(_ptr + _pos);         \
-    }                                     \
-}
-
-/* vectorization
- * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
- * and some compilers, like Intel ICC and MCST LCC, do not support it at all. */
-#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) && !defined(__LCC__)
-#  if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
-#    define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
-#  else
-#    define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
-#  endif
-#else
-#  define DONT_VECTORIZE
-#endif
-
-/* Tell the compiler that a branch is likely or unlikely.
- * Only use these macros if it causes the compiler to generate better code.
- * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
- * and clang, please do.
- */
-#if defined(__GNUC__)
-#define LIKELY(x) (__builtin_expect((x), 1))
-#define UNLIKELY(x) (__builtin_expect((x), 0))
-#else
-#define LIKELY(x) (x)
-#define UNLIKELY(x) (x)
-#endif
-
-/* disable warnings */
-#ifdef _MSC_VER    /* Visual Studio */
-#  include <intrin.h>                    /* For Visual 2005 */
-#  pragma warning(disable : 4100)        /* disable: C4100: unreferenced formal parameter */
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
-#  pragma warning(disable : 4204)        /* disable: C4204: non-constant aggregate initializer */
-#  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
-#  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
-#endif
-
-/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
-#ifndef STATIC_BMI2
-#  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
-#    ifdef __AVX2__  //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
-#       define STATIC_BMI2 1
-#    endif
-#  endif
-#endif
-
-#ifndef STATIC_BMI2
-    #define STATIC_BMI2 0
-#endif
-
-/* compile time determination of SIMD support */
-#if !defined(ZSTD_NO_INTRINSICS)
-#  if defined(__SSE2__) || defined(_M_AMD64) || (defined (_M_IX86) && defined(_M_IX86_FP) && (_M_IX86_FP >= 2))
-#    define ZSTD_ARCH_X86_SSE2
-#  endif
-#  if defined(__ARM_NEON) || defined(_M_ARM64)
-#    define ZSTD_ARCH_ARM_NEON
-#  endif
-#
-#  if defined(ZSTD_ARCH_X86_SSE2)
-#    include <emmintrin.h>
-#  elif defined(ZSTD_ARCH_ARM_NEON)
-#    include <arm_neon.h>
-#  endif
-#endif
-
-/* C-language Attributes are added in C23. */
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
-# define ZSTD_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
-#else
-# define ZSTD_HAS_C_ATTRIBUTE(x) 0
-#endif
-
-/* Only use C++ attributes in C++. Some compilers report support for C++
- * attributes when compiling with C.
- */
-#if defined(__cplusplus) && defined(__has_cpp_attribute)
-# define ZSTD_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
-#else
-# define ZSTD_HAS_CPP_ATTRIBUTE(x) 0
-#endif
-
-/* Define ZSTD_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute.
- * - C23: https://en.cppreference.com/w/c/language/attributes/fallthrough
- * - CPP17: https://en.cppreference.com/w/cpp/language/attributes/fallthrough
- * - Else: __attribute__((__fallthrough__))
- */
-#ifndef ZSTD_FALLTHROUGH
-# if ZSTD_HAS_C_ATTRIBUTE(fallthrough)
-#  define ZSTD_FALLTHROUGH [[fallthrough]]
-# elif ZSTD_HAS_CPP_ATTRIBUTE(fallthrough)
-#  define ZSTD_FALLTHROUGH [[fallthrough]]
-# elif __has_attribute(__fallthrough__)
-/* Leading semicolon is to satisfy gcc-11 with -pedantic. Without the semicolon
- * gcc complains about: a label can only be part of a statement and a declaration is not a statement.
- */
-#  define ZSTD_FALLTHROUGH ; __attribute__((__fallthrough__))
-# else
-#  define ZSTD_FALLTHROUGH
-# endif
-#endif
-
-/*-**************************************************************
-*  Alignment check
-*****************************************************************/
-
-/* this test was initially positioned in mem.h,
- * but this file is removed (or replaced) for linux kernel
- * so it's now hosted in compiler.h,
- * which remains valid for both user & kernel spaces.
- */
-
-#ifndef ZSTD_ALIGNOF
-# if defined(__GNUC__) || defined(_MSC_VER)
-/* covers gcc, clang & MSVC */
-/* note : this section must come first, before C11,
- * due to a limitation in the kernel source generator */
-#  define ZSTD_ALIGNOF(T) __alignof(T)
-
-# elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
-/* C11 support */
-#  include <stdalign.h>
-#  define ZSTD_ALIGNOF(T) alignof(T)
-
-# else
-/* No known support for alignof() - imperfect backup */
-#  define ZSTD_ALIGNOF(T) (sizeof(void*) < sizeof(T) ? sizeof(void*) : sizeof(T))
-
-# endif
-#endif /* ZSTD_ALIGNOF */
-
-/*-**************************************************************
-*  Sanitizer
-*****************************************************************/
-
-#if ZSTD_MEMORY_SANITIZER
-/* Not all platforms that support msan provide sanitizers/msan_interface.h.
- * We therefore declare the functions we need ourselves, rather than trying to
- * include the header file... */
-#include <stddef.h>  /* size_t */
-#define ZSTD_DEPS_NEED_STDINT
-#include "zstd_deps.h"  /* intptr_t */
-
-/* Make memory region fully initialized (without changing its contents). */
-void __msan_unpoison(const volatile void *a, size_t size);
-
-/* Make memory region fully uninitialized (without changing its contents).
-   This is a legacy interface that does not update origin information. Use
-   __msan_allocated_memory() instead. */
-void __msan_poison(const volatile void *a, size_t size);
-
-/* Returns the offset of the first (at least partially) poisoned byte in the
-   memory range, or -1 if the whole range is good. */
-intptr_t __msan_test_shadow(const volatile void *x, size_t size);
-#endif
-
-#if ZSTD_ADDRESS_SANITIZER
-/* Not all platforms that support asan provide sanitizers/asan_interface.h.
- * We therefore declare the functions we need ourselves, rather than trying to
- * include the header file... */
-#include <stddef.h>  /* size_t */
-
-/**
- * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
- *
- * This memory must be previously allocated by your program. Instrumented
- * code is forbidden from accessing addresses in this region until it is
- * unpoisoned. This function is not guaranteed to poison the entire region -
- * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
- * alignment restrictions.
- *
- * \note This function is not thread-safe because no two threads can poison or
- * unpoison memory in the same memory region simultaneously.
- *
- * \param addr Start of memory region.
- * \param size Size of memory region. */
-void __asan_poison_memory_region(void const volatile *addr, size_t size);
-
-/**
- * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
- *
- * This memory must be previously allocated by your program. Accessing
- * addresses in this region is allowed until this region is poisoned again.
- * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
- * to ASan alignment restrictions.
- *
- * \note This function is not thread-safe because no two threads can
- * poison or unpoison memory in the same memory region simultaneously.
- *
- * \param addr Start of memory region.
- * \param size Size of memory region. */
-void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
-#endif
-
-#endif /* ZSTD_COMPILER_H */
diff --git a/dep/zstd/lib/common/cpu.h b/dep/zstd/lib/common/cpu.h
deleted file mode 100644
index 8acd33be3..000000000
--- a/dep/zstd/lib/common/cpu.h
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * Copyright (c) Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef ZSTD_COMMON_CPU_H
-#define ZSTD_COMMON_CPU_H
-
-/**
- * Implementation taken from folly/CpuId.h
- * https://github.com/facebook/folly/blob/master/folly/CpuId.h
- */
-
-#include "mem.h"
-
-#ifdef _MSC_VER
-#include <intrin.h>
-#endif
-
-typedef struct {
-    U32 f1c;
-    U32 f1d;
-    U32 f7b;
-    U32 f7c;
-} ZSTD_cpuid_t;
-
-MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
-    U32 f1c = 0;
-    U32 f1d = 0;
-    U32 f7b = 0;
-    U32 f7c = 0;
-#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
-    int reg[4];
-    __cpuid((int*)reg, 0);
-    {
-        int const n = reg[0];
-        if (n >= 1) {
-            __cpuid((int*)reg, 1);
-            f1c = (U32)reg[2];
-            f1d = (U32)reg[3];
-        }
-        if (n >= 7) {
-            __cpuidex((int*)reg, 7, 0);
-            f7b = (U32)reg[1];
-            f7c = (U32)reg[2];
-        }
-    }
-#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
-    /* The following block like the normal cpuid branch below, but gcc
-     * reserves ebx for use of its pic register so we must specially
-     * handle the save and restore to avoid clobbering the register
-     */
-    U32 n;
-    __asm__(
-        "pushl %%ebx\n\t"
-        "cpuid\n\t"
-        "popl %%ebx\n\t"
-        : "=a"(n)
-        : "a"(0)
-        : "ecx", "edx");
-    if (n >= 1) {
-      U32 f1a;
-      __asm__(
-          "pushl %%ebx\n\t"
-          "cpuid\n\t"
-          "popl %%ebx\n\t"
-          : "=a"(f1a), "=c"(f1c), "=d"(f1d)
-          : "a"(1));
-    }
-    if (n >= 7) {
-      __asm__(
-          "pushl %%ebx\n\t"
-          "cpuid\n\t"
-          "movl %%ebx, %%eax\n\t"
-          "popl %%ebx"
-          : "=a"(f7b), "=c"(f7c)
-          : "a"(7), "c"(0)
-          : "edx");
-    }
-#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
-    U32 n;
-    __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
-    if (n >= 1) {
-      U32 f1a;
-      __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
-    }
-    if (n >= 7) {
-      U32 f7a;
-      __asm__("cpuid"
-              : "=a"(f7a), "=b"(f7b), "=c"(f7c)
-              : "a"(7), "c"(0)
-              : "edx");
-    }
-#endif
-    {
-        ZSTD_cpuid_t cpuid;
-        cpuid.f1c = f1c;
-        cpuid.f1d = f1d;
-        cpuid.f7b = f7b;
-        cpuid.f7c = f7c;
-        return cpuid;
-    }
-}
-
-#define X(name, r, bit)                                                        \
-  MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) {                 \
-    return ((cpuid.r) & (1U << bit)) != 0;                                     \
-  }
-
-/* cpuid(1): Processor Info and Feature Bits. */
-#define C(name, bit) X(name, f1c, bit)
-  C(sse3, 0)
-  C(pclmuldq, 1)
-  C(dtes64, 2)
-  C(monitor, 3)
-  C(dscpl, 4)
-  C(vmx, 5)
-  C(smx, 6)
-  C(eist, 7)
-  C(tm2, 8)
-  C(ssse3, 9)
-  C(cnxtid, 10)
-  C(fma, 12)
-  C(cx16, 13)
-  C(xtpr, 14)
-  C(pdcm, 15)
-  C(pcid, 17)
-  C(dca, 18)
-  C(sse41, 19)
-  C(sse42, 20)
-  C(x2apic, 21)
-  C(movbe, 22)
-  C(popcnt, 23)
-  C(tscdeadline, 24)
-  C(aes, 25)
-  C(xsave, 26)
-  C(osxsave, 27)
-  C(avx, 28)
-  C(f16c, 29)
-  C(rdrand, 30)
-#undef C
-#define D(name, bit) X(name, f1d, bit)
-  D(fpu, 0)
-  D(vme, 1)
-  D(de, 2)
-  D(pse, 3)
-  D(tsc, 4)
-  D(msr, 5)
-  D(pae, 6)
-  D(mce, 7)
-  D(cx8, 8)
-  D(apic, 9)
-  D(sep, 11)
-  D(mtrr, 12)
-  D(pge, 13)
-  D(mca, 14)
-  D(cmov, 15)
-  D(pat, 16)
-  D(pse36, 17)
-  D(psn, 18)
-  D(clfsh, 19)
-  D(ds, 21)
-  D(acpi, 22)
-  D(mmx, 23)
-  D(fxsr, 24)
-  D(sse, 25)
-  D(sse2, 26)
-  D(ss, 27)
-  D(htt, 28)
-  D(tm, 29)
-  D(pbe, 31)
-#undef D
-
-/* cpuid(7): Extended Features. */
-#define B(name, bit) X(name, f7b, bit)
-  B(bmi1, 3)
-  B(hle, 4)
-  B(avx2, 5)
-  B(smep, 7)
-  B(bmi2, 8)
-  B(erms, 9)
-  B(invpcid, 10)
-  B(rtm, 11)
-  B(mpx, 14)
-  B(avx512f, 16)
-  B(avx512dq, 17)
-  B(rdseed, 18)
-  B(adx, 19)
-  B(smap, 20)
-  B(avx512ifma, 21)
-  B(pcommit, 22)
-  B(clflushopt, 23)
-  B(clwb, 24)
-  B(avx512pf, 26)
-  B(avx512er, 27)
-  B(avx512cd, 28)
-  B(sha, 29)
-  B(avx512bw, 30)
-  B(avx512vl, 31)
-#undef B
-#define C(name, bit) X(name, f7c, bit)
-  C(prefetchwt1, 0)
-  C(avx512vbmi, 1)
-#undef C
-
-#undef X
-
-#endif /* ZSTD_COMMON_CPU_H */
diff --git a/dep/zstd/lib/common/debug.c b/dep/zstd/lib/common/debug.c
deleted file mode 100644
index bb863c9ea..000000000
--- a/dep/zstd/lib/common/debug.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/* ******************************************************************
- * debug
- * Part of FSE library
- * Copyright (c) Yann Collet, Facebook, Inc.
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
-****************************************************************** */
-
-
-/*
- * This module only hosts one global variable
- * which can be used to dynamically influence the verbosity of traces,
- * such as DEBUGLOG and RAWLOG
- */
-
-#include "debug.h"
-
-int g_debuglevel = DEBUGLEVEL;
diff --git a/dep/zstd/lib/common/debug.h b/dep/zstd/lib/common/debug.h
deleted file mode 100644
index 3b2a320a1..000000000
--- a/dep/zstd/lib/common/debug.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/* ******************************************************************
- * debug
- * Part of FSE library
- * Copyright (c) Yann Collet, Facebook, Inc.
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
-****************************************************************** */
-
-
-/*
- * The purpose of this header is to enable debug functions.
- * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time,
- * and DEBUG_STATIC_ASSERT() for compile-time.
- *
- * By default, DEBUGLEVEL==0, which means run-time debug is disabled.
- *
- * Level 1 enables assert() only.
- * Starting level 2, traces can be generated and pushed to stderr.
- * The higher the level, the more verbose the traces.
- *
- * It's possible to dynamically adjust level using variable g_debug_level,
- * which is only declared if DEBUGLEVEL>=2,
- * and is a global variable, not multi-thread protected (use with care)
- */
-
-#ifndef DEBUG_H_12987983217
-#define DEBUG_H_12987983217
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-
-/* static assert is triggered at compile time, leaving no runtime artefact.
- * static assert only works with compile-time constants.
- * Also, this variant can only be used inside a function. */
-#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1])
-
-
-/* DEBUGLEVEL is expected to be defined externally,
- * typically through compiler command line.
- * Value must be a number. */
-#ifndef DEBUGLEVEL
-#  define DEBUGLEVEL 0
-#endif
-
-
-/* recommended values for DEBUGLEVEL :
- * 0 : release mode, no debug, all run-time checks disabled
- * 1 : enables assert() only, no display
- * 2 : reserved, for currently active debug path
- * 3 : events once per object lifetime (CCtx, CDict, etc.)
- * 4 : events once per frame
- * 5 : events once per block
- * 6 : events once per sequence (verbose)
- * 7+: events at every position (*very* verbose)
- *
- * It's generally inconvenient to output traces > 5.
- * In which case, it's possible to selectively trigger high verbosity levels
- * by modifying g_debug_level.
- */
-
-#if (DEBUGLEVEL>=1)
-#  define ZSTD_DEPS_NEED_ASSERT
-#  include "zstd_deps.h"
-#else
-#  ifndef assert   /* assert may be already defined, due to prior #include <assert.h> */
-#    define assert(condition) ((void)0)   /* disable assert (default) */
-#  endif
-#endif
-
-#if (DEBUGLEVEL>=2)
-#  define ZSTD_DEPS_NEED_IO
-#  include "zstd_deps.h"
-extern int g_debuglevel; /* the variable is only declared,
-                            it actually lives in debug.c,
-                            and is shared by the whole process.
-                            It's not thread-safe.
-                            It's useful when enabling very verbose levels
-                            on selective conditions (such as position in src) */
-
-#  define RAWLOG(l, ...) {                                       \
-                if (l<=g_debuglevel) {                           \
-                    ZSTD_DEBUG_PRINT(__VA_ARGS__);               \
-            }   }
-#  define DEBUGLOG(l, ...) {                                     \
-                if (l<=g_debuglevel) {                           \
-                    ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
-                    ZSTD_DEBUG_PRINT(" \n");                     \
-            }   }
-#else
-#  define RAWLOG(l, ...)      {}    /* disabled */
-#  define DEBUGLOG(l, ...)    {}    /* disabled */
-#endif
-
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif /* DEBUG_H_12987983217 */
diff --git a/dep/zstd/lib/common/entropy_common.c b/dep/zstd/lib/common/entropy_common.c
deleted file mode 100644
index 4229b40c5..000000000
--- a/dep/zstd/lib/common/entropy_common.c
+++ /dev/null
@@ -1,368 +0,0 @@
-/* ******************************************************************
- * Common functions of New Generation Entropy library
- * Copyright (c) Yann Collet, Facebook, Inc.
- *
- *  You can contact the author at :
- *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
- *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
-****************************************************************** */
-
-/* *************************************
-*  Dependencies
-***************************************/
-#include "mem.h"
-#include "error_private.h"       /* ERR_*, ERROR */
-#define FSE_STATIC_LINKING_ONLY  /* FSE_MIN_TABLELOG */
-#include "fse.h"
-#define HUF_STATIC_LINKING_ONLY  /* HUF_TABLELOG_ABSOLUTEMAX */
-#include "huf.h"
-
-
-/*===   Version   ===*/
-unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
-
-
-/*===   Error Management   ===*/
-unsigned FSE_isError(size_t code) { return ERR_isError(code); }
-const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
-
-unsigned HUF_isError(size_t code) { return ERR_isError(code); }
-const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
-
-
-/*-**************************************************************
-*  FSE NCount encoding-decoding
-****************************************************************/
-static U32 FSE_ctz(U32 val)
-{
-    assert(val != 0);
-    {
-#   if defined(_MSC_VER)   /* Visual */
-        if (val != 0) {
-            unsigned long r;
-            _BitScanForward(&r, val);
-            return (unsigned)r;
-        } else {
-            /* Should not reach this code path */
-            __assume(0);
-        }
-#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
-        return __builtin_ctz(val);
-#   elif defined(__ICCARM__)    /* IAR Intrinsic */
-        return __CTZ(val);
-#   else   /* Software version */
-        U32 count = 0;
-        while ((val & 1) == 0) {
-            val >>= 1;
-            ++count;
-        }
-        return count;
-#   endif
-    }
-}
-
-FORCE_INLINE_TEMPLATE
-size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
-                           const void* headerBuffer, size_t hbSize)
-{
-    const BYTE* const istart = (const BYTE*) headerBuffer;
-    const BYTE* const iend = istart + hbSize;
-    const BYTE* ip = istart;
-    int nbBits;
-    int remaining;
-    int threshold;
-    U32 bitStream;
-    int bitCount;
-    unsigned charnum = 0;
-    unsigned const maxSV1 = *maxSVPtr + 1;
-    int previous0 = 0;
-
-    if (hbSize < 8) {
-        /* This function only works when hbSize >= 8 */
-        char buffer[8] = {0};
-        ZSTD_memcpy(buffer, headerBuffer, hbSize);
-        {   size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
-                                                    buffer, sizeof(buffer));
-            if (FSE_isError(countSize)) return countSize;
-            if (countSize > hbSize) return ERROR(corruption_detected);
-            return countSize;
-    }   }
-    assert(hbSize >= 8);
-
-    /* init */
-    ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0]));   /* all symbols not present in NCount have a frequency of 0 */
-    bitStream = MEM_readLE32(ip);
-    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
-    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
-    bitStream >>= 4;
-    bitCount = 4;
-    *tableLogPtr = nbBits;
-    remaining = (1<<nbBits)+1;
-    threshold = 1<<nbBits;
-    nbBits++;
-
-    for (;;) {
-        if (previous0) {
-            /* Count the number of repeats. Each time the
-             * 2-bit repeat code is 0b11 there is another
-             * repeat.
-             * Avoid UB by setting the high bit to 1.
-             */
-            int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
-            while (repeats >= 12) {
-                charnum += 3 * 12;
-                if (LIKELY(ip <= iend-7)) {
-                    ip += 3;
-                } else {
-                    bitCount -= (int)(8 * (iend - 7 - ip));
-                    bitCount &= 31;
-                    ip = iend - 4;
-                }
-                bitStream = MEM_readLE32(ip) >> bitCount;
-                repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
-            }
-            charnum += 3 * repeats;
-            bitStream >>= 2 * repeats;
-            bitCount += 2 * repeats;
-
-            /* Add the final repeat which isn't 0b11. */
-            assert((bitStream & 3) < 3);
-            charnum += bitStream & 3;
-            bitCount += 2;
-
-            /* This is an error, but break and return an error
-             * at the end, because returning out of a loop makes
-             * it harder for the compiler to optimize.
-             */
-            if (charnum >= maxSV1) break;
-
-            /* We don't need to set the normalized count to 0
-             * because we already memset the whole buffer to 0.
-             */
-
-            if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
-                assert((bitCount >> 3) <= 3); /* For first condition to work */
-                ip += bitCount>>3;
-                bitCount &= 7;
-            } else {
-                bitCount -= (int)(8 * (iend - 4 - ip));
-                bitCount &= 31;
-                ip = iend - 4;
-            }
-            bitStream = MEM_readLE32(ip) >> bitCount;
-        }
-        {
-            int const max = (2*threshold-1) - remaining;
-            int count;
-
-            if ((bitStream & (threshold-1)) < (U32)max) {
-                count = bitStream & (threshold-1);
-                bitCount += nbBits-1;
-            } else {
-                count = bitStream & (2*threshold-1);
-                if (count >= threshold) count -= max;
-                bitCount += nbBits;
-            }
-
-            count--;   /* extra accuracy */
-            /* When it matters (small blocks), this is a
-             * predictable branch, because we don't use -1.
-             */
-            if (count >= 0) {
-                remaining -= count;
-            } else {
-                assert(count == -1);
-                remaining += count;
-            }
-            normalizedCounter[charnum++] = (short)count;
-            previous0 = !count;
-
-            assert(threshold > 1);
-            if (remaining < threshold) {
-                /* This branch can be folded into the
-                 * threshold update condition because we
-                 * know that threshold > 1.
-                 */
-                if (remaining <= 1) break;
-                nbBits = BIT_highbit32(remaining) + 1;
-                threshold = 1 << (nbBits - 1);
-            }
-            if (charnum >= maxSV1) break;
-
-            if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
-                ip += bitCount>>3;
-                bitCount &= 7;
-            } else {
-                bitCount -= (int)(8 * (iend - 4 - ip));
-                bitCount &= 31;
-                ip = iend - 4;
-            }
-            bitStream = MEM_readLE32(ip) >> bitCount;
-    }   }
-    if (remaining != 1) return ERROR(corruption_detected);
-    /* Only possible when there are too many zeros. */
-    if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall);
-    if (bitCount > 32) return ERROR(corruption_detected);
-    *maxSVPtr = charnum-1;
-
-    ip += (bitCount+7)>>3;
-    return ip-istart;
-}
-
-/* Avoids the FORCE_INLINE of the _body() function. */
-static size_t FSE_readNCount_body_default(
-        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
-        const void* headerBuffer, size_t hbSize)
-{
-    return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
-}
-
-#if DYNAMIC_BMI2
-BMI2_TARGET_ATTRIBUTE static size_t FSE_readNCount_body_bmi2(
-        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
-        const void* headerBuffer, size_t hbSize)
-{
-    return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
-}
-#endif
-
-size_t FSE_readNCount_bmi2(
-        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
-        const void* headerBuffer, size_t hbSize, int bmi2)
-{
-#if DYNAMIC_BMI2
-    if (bmi2) {
-        return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
-    }
-#endif
-    (void)bmi2;
-    return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
-}
-
-size_t FSE_readNCount(
-        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
-        const void* headerBuffer, size_t hbSize)
-{
-    return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
-}
-
-
-/*! HUF_readStats() :
-    Read compact Huffman tree, saved by HUF_writeCTable().
-    `huffWeight` is destination buffer.
-    `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
-    @return : size read from `src` , or an error Code .
-    Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
-*/
-size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
-                     U32* nbSymbolsPtr, U32* tableLogPtr,
-                     const void* src, size_t srcSize)
-{
-    U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
-    return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0);
-}
-
-FORCE_INLINE_TEMPLATE size_t
-HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
-                   U32* nbSymbolsPtr, U32* tableLogPtr,
-                   const void* src, size_t srcSize,
-                   void* workSpace, size_t wkspSize,
-                   int bmi2)
-{
-    U32 weightTotal;
-    const BYTE* ip = (const BYTE*) src;
-    size_t iSize;
-    size_t oSize;
-
-    if (!srcSize) return ERROR(srcSize_wrong);
-    iSize = ip[0];
-    /* ZSTD_memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */
-
-    if (iSize >= 128) {  /* special header */
-        oSize = iSize - 127;
-        iSize = ((oSize+1)/2);
-        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
-        if (oSize >= hwSize) return ERROR(corruption_detected);
-        ip += 1;
-        {   U32 n;
-            for (n=0; n<oSize; n+=2) {
-                huffWeight[n]   = ip[n/2] >> 4;
-                huffWeight[n+1] = ip[n/2] & 15;
-    }   }   }
-    else  {   /* header compressed with FSE (normal case) */
-        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
-        /* max (hwSize-1) values decoded, as last one is implied */
-        oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2);
-        if (FSE_isError(oSize)) return oSize;
-    }
-
-    /* collect weight stats */
-    ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
-    weightTotal = 0;
-    {   U32 n; for (n=0; n<oSize; n++) {
-            if (huffWeight[n] > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
-            rankStats[huffWeight[n]]++;
-            weightTotal += (1 << huffWeight[n]) >> 1;
-    }   }
-    if (weightTotal == 0) return ERROR(corruption_detected);
-
-    /* get last non-null symbol weight (implied, total must be 2^n) */
-    {   U32 const tableLog = BIT_highbit32(weightTotal) + 1;
-        if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
-        *tableLogPtr = tableLog;
-        /* determine last weight */
-        {   U32 const total = 1 << tableLog;
-            U32 const rest = total - weightTotal;
-            U32 const verif = 1 << BIT_highbit32(rest);
-            U32 const lastWeight = BIT_highbit32(rest) + 1;
-            if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
-            huffWeight[oSize] = (BYTE)lastWeight;
-            rankStats[lastWeight]++;
-    }   }
-
-    /* check tree construction validity */
-    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
-
-    /* results */
-    *nbSymbolsPtr = (U32)(oSize+1);
-    return iSize+1;
-}
-
-/* Avoids the FORCE_INLINE of the _body() function. */
-static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats,
-                     U32* nbSymbolsPtr, U32* tableLogPtr,
-                     const void* src, size_t srcSize,
-                     void* workSpace, size_t wkspSize)
-{
-    return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0);
-}
-
-#if DYNAMIC_BMI2
-static BMI2_TARGET_ATTRIBUTE size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
-                     U32* nbSymbolsPtr, U32* tableLogPtr,
-                     const void* src, size_t srcSize,
-                     void* workSpace, size_t wkspSize)
-{
-    return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1);
-}
-#endif
-
-size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
-                     U32* nbSymbolsPtr, U32* tableLogPtr,
-                     const void* src, size_t srcSize,
-                     void* workSpace, size_t wkspSize,
-                     int bmi2)
-{
-#if DYNAMIC_BMI2
-    if (bmi2) {
-        return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
-    }
-#endif
-    (void)bmi2;
-    return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
-}
diff --git a/dep/zstd/lib/common/error_private.c b/dep/zstd/lib/common/error_private.c
deleted file mode 100644
index 6d1135f8c..000000000
--- a/dep/zstd/lib/common/error_private.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-/* The purpose of this file is to have a single list of error strings embedded in binary */
-
-#include "error_private.h"
-
-const char* ERR_getErrorString(ERR_enum code)
-{
-#ifdef ZSTD_STRIP_ERROR_STRINGS
-    (void)code;
-    return "Error strings stripped";
-#else
-    static const char* const notErrorCode = "Unspecified error code";
-    switch( code )
-    {
-    case PREFIX(no_error): return "No error detected";
-    case PREFIX(GENERIC):  return "Error (generic)";
-    case PREFIX(prefix_unknown): return "Unknown frame descriptor";
-    case PREFIX(version_unsupported): return "Version not supported";
-    case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
-    case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
-    case PREFIX(corruption_detected): return "Corrupted block detected";
-    case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
-    case PREFIX(parameter_unsupported): return "Unsupported parameter";
-    case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
-    case PREFIX(init_missing): return "Context should be init first";
-    case PREFIX(memory_allocation): return "Allocation error : not enough memory";
-    case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough";
-    case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
-    case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
-    case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
-    case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
-    case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
-    case PREFIX(dictionary_wrong): return "Dictionary mismatch";
-    case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
-    case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
-    case PREFIX(srcSize_wrong): return "Src size is incorrect";
-    case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer";
-        /* following error codes are not stable and may be removed or changed in a future version */
-    case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
-    case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
-    case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
-    case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
-    case PREFIX(maxCode):
-    default: return notErrorCode;
-    }
-#endif
-}
diff --git a/dep/zstd/lib/common/error_private.h b/dep/zstd/lib/common/error_private.h
deleted file mode 100644
index 007d81066..000000000
--- a/dep/zstd/lib/common/error_private.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-/* Note : this module is expected to remain private, do not expose it */
-
-#ifndef ERROR_H_MODULE
-#define ERROR_H_MODULE
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-
-/* ****************************************
-*  Dependencies
-******************************************/
-#include "../zstd_errors.h"  /* enum list */
-#include "compiler.h"
-#include "debug.h"
-#include "zstd_deps.h"       /* size_t */
-
-
-/* ****************************************
-*  Compiler-specific
-******************************************/
-#if defined(__GNUC__)
-#  define ERR_STATIC static __attribute__((unused))
-#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-#  define ERR_STATIC static inline
-#elif defined(_MSC_VER)
-#  define ERR_STATIC static __inline
-#else
-#  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
-#endif
-
-
-/*-****************************************
-*  Customization (error_public.h)
-******************************************/
-typedef ZSTD_ErrorCode ERR_enum;
-#define PREFIX(name) ZSTD_error_##name
-
-
-/*-****************************************
-*  Error codes handling
-******************************************/
-#undef ERROR   /* already defined on Visual Studio */
-#define ERROR(name) ZSTD_ERROR(name)
-#define ZSTD_ERROR(name) ((size_t)-PREFIX(name))
-
-ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
-
-ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
-
-/* check and forward error code */
-#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
-#define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
-
-
-/*-****************************************
-*  Error Strings
-******************************************/
-
-const char* ERR_getErrorString(ERR_enum code);   /* error_private.c */
-
-ERR_STATIC const char* ERR_getErrorName(size_t code)
-{
-    return ERR_getErrorString(ERR_getErrorCode(code));
-}
-
-/**
- * Ignore: this is an internal helper.
- *
- * This is a helper function to help force C99-correctness during compilation.
- * Under strict compilation modes, variadic macro arguments can't be empty.
- * However, variadic function arguments can be. Using a function therefore lets
- * us statically check that at least one (string) argument was passed,
- * independent of the compilation flags.
- */
-static INLINE_KEYWORD UNUSED_ATTR
-void _force_has_format_string(const char *format, ...) {
-  (void)format;
-}
-
-/**
- * Ignore: this is an internal helper.
- *
- * We want to force this function invocation to be syntactically correct, but
- * we don't want to force runtime evaluation of its arguments.
- */
-#define _FORCE_HAS_FORMAT_STRING(...) \
-  if (0) { \
-    _force_has_format_string(__VA_ARGS__); \
-  }
-
-#define ERR_QUOTE(str) #str
-
-/**
- * Return the specified error if the condition evaluates to true.
- *
- * In debug modes, prints additional information.
- * In order to do that (particularly, printing the conditional that failed),
- * this can't just wrap RETURN_ERROR().
- */
-#define RETURN_ERROR_IF(cond, err, ...) \
-  if (cond) { \
-    RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
-           __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
-    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
-    RAWLOG(3, ": " __VA_ARGS__); \
-    RAWLOG(3, "\n"); \
-    return ERROR(err); \
-  }
-
-/**
- * Unconditionally return the specified error.
- *
- * In debug modes, prints additional information.
- */
-#define RETURN_ERROR(err, ...) \
-  do { \
-    RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
-           __FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \
-    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
-    RAWLOG(3, ": " __VA_ARGS__); \
-    RAWLOG(3, "\n"); \
-    return ERROR(err); \
-  } while(0);
-
-/**
- * If the provided expression evaluates to an error code, returns that error code.
- *
- * In debug modes, prints additional information.
- */
-#define FORWARD_IF_ERROR(err, ...) \
-  do { \
-    size_t const err_code = (err); \
-    if (ERR_isError(err_code)) { \
-      RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
-             __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
-      _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
-      RAWLOG(3, ": " __VA_ARGS__); \
-      RAWLOG(3, "\n"); \
-      return err_code; \
-    } \
-  } while(0);
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif /* ERROR_H_MODULE */
diff --git a/dep/zstd/lib/common/fse.h b/dep/zstd/lib/common/fse.h
deleted file mode 100644
index 714bfd3e7..000000000
--- a/dep/zstd/lib/common/fse.h
+++ /dev/null
@@ -1,717 +0,0 @@
-/* ******************************************************************
- * FSE : Finite State Entropy codec
- * Public Prototypes declaration
- * Copyright (c) Yann Collet, Facebook, Inc.
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
-****************************************************************** */
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-#ifndef FSE_H
-#define FSE_H
-
-
-/*-*****************************************
-*  Dependencies
-******************************************/
-#include "zstd_deps.h"    /* size_t, ptrdiff_t */
-
-
-/*-*****************************************
-*  FSE_PUBLIC_API : control library symbols visibility
-******************************************/
-#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
-#  define FSE_PUBLIC_API __attribute__ ((visibility ("default")))
-#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)   /* Visual expected */
-#  define FSE_PUBLIC_API __declspec(dllexport)
-#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
-#  define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
-#else
-#  define FSE_PUBLIC_API
-#endif
-
-/*------   Version   ------*/
-#define FSE_VERSION_MAJOR    0
-#define FSE_VERSION_MINOR    9
-#define FSE_VERSION_RELEASE  0
-
-#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE
-#define FSE_QUOTE(str) #str
-#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str)
-#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION)
-
-#define FSE_VERSION_NUMBER  (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
-FSE_PUBLIC_API unsigned FSE_versionNumber(void);   /**< library version number; to be used when checking dll version */
-
-
-/*-****************************************
-*  FSE simple functions
-******************************************/
-/*! FSE_compress() :
-    Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
-    'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
-    @return : size of compressed data (<= dstCapacity).
-    Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
-                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
-                     if FSE_isError(return), compression failed (more details using FSE_getErrorName())
-*/
-FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
-                             const void* src, size_t srcSize);
-
-/*! FSE_decompress():
-    Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
-    into already allocated destination buffer 'dst', of size 'dstCapacity'.
-    @return : size of regenerated data (<= maxDstSize),
-              or an error code, which can be tested using FSE_isError() .
-
-    ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
-    Why ? : making this distinction requires a header.
-    Header management is intentionally delegated to the user layer, which can better manage special cases.
-*/
-FSE_PUBLIC_API size_t FSE_decompress(void* dst,  size_t dstCapacity,
-                               const void* cSrc, size_t cSrcSize);
-
-
-/*-*****************************************
-*  Tool functions
-******************************************/
-FSE_PUBLIC_API size_t FSE_compressBound(size_t size);       /* maximum compressed size */
-
-/* Error Management */
-FSE_PUBLIC_API unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
-FSE_PUBLIC_API const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
-
-
-/*-*****************************************
-*  FSE advanced functions
-******************************************/
-/*! FSE_compress2() :
-    Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
-    Both parameters can be defined as '0' to mean : use default value
-    @return : size of compressed data
-    Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
-                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
-                     if FSE_isError(return), it's an error code.
-*/
-FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
-
-
-/*-*****************************************
-*  FSE detailed API
-******************************************/
-/*!
-FSE_compress() does the following:
-1. count symbol occurrence from source[] into table count[] (see hist.h)
-2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
-3. save normalized counters to memory buffer using writeNCount()
-4. build encoding table 'CTable' from normalized counters
-5. encode the data stream using encoding table 'CTable'
-
-FSE_decompress() does the following:
-1. read normalized counters with readNCount()
-2. build decoding table 'DTable' from normalized counters
-3. decode the data stream using decoding table 'DTable'
-
-The following API allows targeting specific sub-functions for advanced tasks.
-For example, it's possible to compress several blocks using the same 'CTable',
-or to save and provide normalized distribution using external method.
-*/
-
-/* *** COMPRESSION *** */
-
-/*! FSE_optimalTableLog():
-    dynamically downsize 'tableLog' when conditions are met.
-    It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
-    @return : recommended tableLog (necessarily <= 'maxTableLog') */
-FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
-
-/*! FSE_normalizeCount():
-    normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
-    'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
-    useLowProbCount is a boolean parameter which trades off compressed size for
-    faster header decoding. When it is set to 1, the compressed data will be slightly
-    smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
-    faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
-    is a good default, since header deserialization makes a big speed difference.
-    Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
-    @return : tableLog,
-              or an errorCode, which can be tested using FSE_isError() */
-FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
-                    const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);
-
-/*! FSE_NCountWriteBound():
-    Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
-    Typically useful for allocation purpose. */
-FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
-
-/*! FSE_writeNCount():
-    Compactly save 'normalizedCounter' into 'buffer'.
-    @return : size of the compressed table,
-              or an errorCode, which can be tested using FSE_isError(). */
-FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
-                                 const short* normalizedCounter,
-                                 unsigned maxSymbolValue, unsigned tableLog);
-
-/*! Constructor and Destructor of FSE_CTable.
-    Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
-typedef unsigned FSE_CTable;   /* don't allocate that. It's only meant to be more restrictive than void* */
-FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
-FSE_PUBLIC_API void        FSE_freeCTable (FSE_CTable* ct);
-
-/*! FSE_buildCTable():
-    Builds `ct`, which must be already allocated, using FSE_createCTable().
-    @return : 0, or an errorCode, which can be tested using FSE_isError() */
-FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
-
-/*! FSE_compress_usingCTable():
-    Compress `src` using `ct` into `dst` which must be already allocated.
-    @return : size of compressed data (<= `dstCapacity`),
-              or 0 if compressed data could not fit into `dst`,
-              or an errorCode, which can be tested using FSE_isError() */
-FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
-
-/*!
-Tutorial :
-----------
-The first step is to count all symbols. FSE_count() does this job very fast.
-Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
-'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
-maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
-FSE_count() will return the number of occurrence of the most frequent symbol.
-This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
-If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
-
-The next step is to normalize the frequencies.
-FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
-It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
-You can use 'tableLog'==0 to mean "use default tableLog value".
-If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
-which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
-
-The result of FSE_normalizeCount() will be saved into a table,
-called 'normalizedCounter', which is a table of signed short.
-'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
-The return value is tableLog if everything proceeded as expected.
-It is 0 if there is a single symbol within distribution.
-If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
-
-'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
-'buffer' must be already allocated.
-For guaranteed success, buffer size must be at least FSE_headerBound().
-The result of the function is the number of bytes written into 'buffer'.
-If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
-
-'normalizedCounter' can then be used to create the compression table 'CTable'.
-The space required by 'CTable' must be already allocated, using FSE_createCTable().
-You can then use FSE_buildCTable() to fill 'CTable'.
-If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
-
-'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
-Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
-The function returns the size of compressed data (without header), necessarily <= `dstCapacity`.
-If it returns '0', compressed data could not fit into 'dst'.
-If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
-*/
-
-
-/* *** DECOMPRESSION *** */
-
-/*! FSE_readNCount():
-    Read compactly saved 'normalizedCounter' from 'rBuffer'.
-    @return : size read from 'rBuffer',
-              or an errorCode, which can be tested using FSE_isError().
-              maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
-FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
-                           unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
-                           const void* rBuffer, size_t rBuffSize);
-
-/*! FSE_readNCount_bmi2():
- * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
- */
-FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
-                           unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
-                           const void* rBuffer, size_t rBuffSize, int bmi2);
-
-/*! Constructor and Destructor of FSE_DTable.
-    Note that its size depends on 'tableLog' */
-typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
-FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
-FSE_PUBLIC_API void        FSE_freeDTable(FSE_DTable* dt);
-
-/*! FSE_buildDTable():
-    Builds 'dt', which must be already allocated, using FSE_createDTable().
-    return : 0, or an errorCode, which can be tested using FSE_isError() */
-FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
-
-/*! FSE_decompress_usingDTable():
-    Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
-    into `dst` which must be already allocated.
-    @return : size of regenerated data (necessarily <= `dstCapacity`),
-              or an errorCode, which can be tested using FSE_isError() */
-FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
-
-/*!
-Tutorial :
-----------
-(Note : these functions only decompress FSE-compressed blocks.
- If block is uncompressed, use memcpy() instead
- If block is a single repeated byte, use memset() instead )
-
-The first step is to obtain the normalized frequencies of symbols.
-This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
-'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
-In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
-or size the table to handle worst case situations (typically 256).
-FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
-The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
-Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
-If there is an error, the function will return an error code, which can be tested using FSE_isError().
-
-The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
-This is performed by the function FSE_buildDTable().
-The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
-If there is an error, the function will return an error code, which can be tested using FSE_isError().
-
-`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable().
-`cSrcSize` must be strictly correct, otherwise decompression will fail.
-FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
-If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
-*/
-
-#endif  /* FSE_H */
-
-#if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
-#define FSE_H_FSE_STATIC_LINKING_ONLY
-
-/* *** Dependency *** */
-#include "bitstream.h"
-
-
-/* *****************************************
-*  Static allocation
-*******************************************/
-/* FSE buffer bounds */
-#define FSE_NCOUNTBOUND 512
-#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
-#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
-
-/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
-#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
-#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<(maxTableLog)))
-
-/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
-#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue)   (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
-#define FSE_DTABLE_SIZE(maxTableLog)                   (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable))
-
-
-/* *****************************************
- *  FSE advanced API
- ***************************************** */
-
-unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
-/**< same as FSE_optimalTableLog(), which used `minus==2` */
-
-/* FSE_compress_wksp() :
- * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
- * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
- */
-#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)   ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
-size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
-
-size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
-/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
-
-size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
-/**< build a fake FSE_CTable, designed to compress always the same symbolValue */
-
-/* FSE_buildCTable_wksp() :
- * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
- * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
- * See FSE_buildCTable_wksp() for breakdown of workspace usage.
- */
-#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */)
-#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
-size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
-
-#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
-#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
-FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
-/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
-
-size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
-/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
-
-size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
-/**< build a fake FSE_DTable, designed to always generate the same symbolValue */
-
-#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
-#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
-size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
-/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
-
-size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
-/**< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */
-
-typedef enum {
-   FSE_repeat_none,  /**< Cannot use the previous table */
-   FSE_repeat_check, /**< Can use the previous table but it must be checked */
-   FSE_repeat_valid  /**< Can use the previous table and it is assumed to be valid */
- } FSE_repeat;
-
-/* *****************************************
-*  FSE symbol compression API
-*******************************************/
-/*!
-   This API consists of small unitary functions, which highly benefit from being inlined.
-   Hence their body are included in next section.
-*/
-typedef struct {
-    ptrdiff_t   value;
-    const void* stateTable;
-    const void* symbolTT;
-    unsigned    stateLog;
-} FSE_CState_t;
-
-static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct);
-
-static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol);
-
-static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr);
-
-/**<
-These functions are inner components of FSE_compress_usingCTable().
-They allow the creation of custom streams, mixing multiple tables and bit sources.
-
-A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
-So the first symbol you will encode is the last you will decode, like a LIFO stack.
-
-You will need a few variables to track your CStream. They are :
-
-FSE_CTable    ct;         // Provided by FSE_buildCTable()
-BIT_CStream_t bitStream;  // bitStream tracking structure
-FSE_CState_t  state;      // State tracking structure (can have several)
-
-
-The first thing to do is to init bitStream and state.
-    size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize);
-    FSE_initCState(&state, ct);
-
-Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
-You can then encode your input data, byte after byte.
-FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
-Remember decoding will be done in reverse direction.
-    FSE_encodeByte(&bitStream, &state, symbol);
-
-At any time, you can also add any bit sequence.
-Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
-    BIT_addBits(&bitStream, bitField, nbBits);
-
-The above methods don't commit data to memory, they just store it into local register, for speed.
-Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
-Writing data to memory is a manual operation, performed by the flushBits function.
-    BIT_flushBits(&bitStream);
-
-Your last FSE encoding operation shall be to flush your last state value(s).
-    FSE_flushState(&bitStream, &state);
-
-Finally, you must close the bitStream.
-The function returns the size of CStream in bytes.
-If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
-If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
-    size_t size = BIT_closeCStream(&bitStream);
-*/
-
-
-/* *****************************************
-*  FSE symbol decompression API
-*******************************************/
-typedef struct {
-    size_t      state;
-    const void* table;   /* precise table may vary, depending on U16 */
-} FSE_DState_t;
-
-
-static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
-
-static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
-
-static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
-
-/**<
-Let's now decompose FSE_decompress_usingDTable() into its unitary components.
-You will decode FSE-encoded symbols from the bitStream,
-and also any other bitFields you put in, **in reverse order**.
-
-You will need a few variables to track your bitStream. They are :
-
-BIT_DStream_t DStream;    // Stream context
-FSE_DState_t  DState;     // State context. Multiple ones are possible
-FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
-
-The first thing to do is to init the bitStream.
-    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
-
-You should then retrieve your initial state(s)
-(in reverse flushing order if you have several ones) :
-    errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
-
-You can then decode your data, symbol after symbol.
-For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
-Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
-    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
-
-You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
-Note : maximum allowed nbBits is 25, for 32-bits compatibility
-    size_t bitField = BIT_readBits(&DStream, nbBits);
-
-All above operations only read from local register (which size depends on size_t).
-Refueling the register from memory is manually performed by the reload method.
-    endSignal = FSE_reloadDStream(&DStream);
-
-BIT_reloadDStream() result tells if there is still some more data to read from DStream.
-BIT_DStream_unfinished : there is still some data left into the DStream.
-BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
-BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
-BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
-
-When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
-to properly detect the exact end of stream.
-After each decoded symbol, check if DStream is fully consumed using this simple test :
-    BIT_reloadDStream(&DStream) >= BIT_DStream_completed
-
-When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
-Checking if DStream has reached its end is performed by :
-    BIT_endOfDStream(&DStream);
-Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
-    FSE_endOfDState(&DState);
-*/
-
-
-/* *****************************************
-*  FSE unsafe API
-*******************************************/
-static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
-/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
-
-
-/* *****************************************
-*  Implementation of inlined functions
-*******************************************/
-typedef struct {
-    int deltaFindState;
-    U32 deltaNbBits;
-} FSE_symbolCompressionTransform; /* total 8 bytes */
-
-MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
-{
-    const void* ptr = ct;
-    const U16* u16ptr = (const U16*) ptr;
-    const U32 tableLog = MEM_read16(ptr);
-    statePtr->value = (ptrdiff_t)1<<tableLog;
-    statePtr->stateTable = u16ptr+2;
-    statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1);
-    statePtr->stateLog = tableLog;
-}
-
-
-/*! FSE_initCState2() :
-*   Same as FSE_initCState(), but the first symbol to include (which will be the last to be read)
-*   uses the smallest state value possible, saving the cost of this symbol */
-MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol)
-{
-    FSE_initCState(statePtr, ct);
-    {   const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
-        const U16* stateTable = (const U16*)(statePtr->stateTable);
-        U32 nbBitsOut  = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16);
-        statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits;
-        statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
-    }
-}
-
-MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol)
-{
-    FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
-    const U16* const stateTable = (const U16*)(statePtr->stateTable);
-    U32 const nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
-    BIT_addBits(bitC, statePtr->value, nbBitsOut);
-    statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
-}
-
-MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
-{
-    BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
-    BIT_flushBits(bitC);
-}
-
-
-/* FSE_getMaxNbBits() :
- * Approximate maximum cost of a symbol, in bits.
- * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
- * note 1 : assume symbolValue is valid (<= maxSymbolValue)
- * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
-MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
-{
-    const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
-    return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16;
-}
-
-/* FSE_bitCost() :
- * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits)
- * note 1 : assume symbolValue is valid (<= maxSymbolValue)
- * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
-MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog)
-{
-    const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
-    U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16;
-    U32 const threshold = (minNbBits+1) << 16;
-    assert(tableLog < 16);
-    assert(accuracyLog < 31-tableLog);  /* ensure enough room for renormalization double shift */
-    {   U32 const tableSize = 1 << tableLog;
-        U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize);
-        U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog;   /* linear interpolation (very approximate) */
-        U32 const bitMultiplier = 1 << accuracyLog;
-        assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold);
-        assert(normalizedDeltaFromThreshold <= bitMultiplier);
-        return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold;
-    }
-}
-
-
-/* ======    Decompression    ====== */
-
-typedef struct {
-    U16 tableLog;
-    U16 fastMode;
-} FSE_DTableHeader;   /* sizeof U32 */
-
-typedef struct
-{
-    unsigned short newState;
-    unsigned char  symbol;
-    unsigned char  nbBits;
-} FSE_decode_t;   /* size == U32 */
-
-MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
-{
-    const void* ptr = dt;
-    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
-    DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
-    BIT_reloadDStream(bitD);
-    DStatePtr->table = dt + 1;
-}
-
-MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr)
-{
-    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
-    return DInfo.symbol;
-}
-
-MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
-{
-    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
-    U32 const nbBits = DInfo.nbBits;
-    size_t const lowBits = BIT_readBits(bitD, nbBits);
-    DStatePtr->state = DInfo.newState + lowBits;
-}
-
-MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
-{
-    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
-    U32 const nbBits = DInfo.nbBits;
-    BYTE const symbol = DInfo.symbol;
-    size_t const lowBits = BIT_readBits(bitD, nbBits);
-
-    DStatePtr->state = DInfo.newState + lowBits;
-    return symbol;
-}
-
-/*! FSE_decodeSymbolFast() :
-    unsafe, only works if no symbol has a probability > 50% */
-MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
-{
-    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
-    U32 const nbBits = DInfo.nbBits;
-    BYTE const symbol = DInfo.symbol;
-    size_t const lowBits = BIT_readBitsFast(bitD, nbBits);
-
-    DStatePtr->state = DInfo.newState + lowBits;
-    return symbol;
-}
-
-MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
-{
-    return DStatePtr->state == 0;
-}
-
-
-
-#ifndef FSE_COMMONDEFS_ONLY
-
-/* **************************************************************
-*  Tuning parameters
-****************************************************************/
-/*!MEMORY_USAGE :
-*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
-*  Increasing memory usage improves compression ratio
-*  Reduced memory usage can improve speed, due to cache effect
-*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
-#ifndef FSE_MAX_MEMORY_USAGE
-#  define FSE_MAX_MEMORY_USAGE 14
-#endif
-#ifndef FSE_DEFAULT_MEMORY_USAGE
-#  define FSE_DEFAULT_MEMORY_USAGE 13
-#endif
-#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
-#  error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
-#endif
-
-/*!FSE_MAX_SYMBOL_VALUE :
-*  Maximum symbol value authorized.
-*  Required for proper stack allocation */
-#ifndef FSE_MAX_SYMBOL_VALUE
-#  define FSE_MAX_SYMBOL_VALUE 255
-#endif
-
-/* **************************************************************
-*  template functions type & suffix
-****************************************************************/
-#define FSE_FUNCTION_TYPE BYTE
-#define FSE_FUNCTION_EXTENSION
-#define FSE_DECODE_TYPE FSE_decode_t
-
-
-#endif   /* !FSE_COMMONDEFS_ONLY */
-
-
-/* ***************************************************************
-*  Constants
-*****************************************************************/
-#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
-#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
-#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
-#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
-#define FSE_MIN_TABLELOG 5
-
-#define FSE_TABLELOG_ABSOLUTE_MAX 15
-#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
-#  error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
-#endif
-
-#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
-
-
-#endif /* FSE_STATIC_LINKING_ONLY */
-
-
-#if defined (__cplusplus)
-}
-#endif
diff --git a/dep/zstd/lib/common/fse_decompress.c b/dep/zstd/lib/common/fse_decompress.c
deleted file mode 100644
index a5a358015..000000000
--- a/dep/zstd/lib/common/fse_decompress.c
+++ /dev/null
@@ -1,403 +0,0 @@
-/* ******************************************************************
- * FSE : Finite State Entropy decoder
- * Copyright (c) Yann Collet, Facebook, Inc.
- *
- *  You can contact the author at :
- *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
- *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
-****************************************************************** */
-
-
-/* **************************************************************
-*  Includes
-****************************************************************/
-#include "debug.h"      /* assert */
-#include "bitstream.h"
-#include "compiler.h"
-#define FSE_STATIC_LINKING_ONLY
-#include "fse.h"
-#include "error_private.h"
-#define ZSTD_DEPS_NEED_MALLOC
-#include "zstd_deps.h"
-
-
-/* **************************************************************
-*  Error Management
-****************************************************************/
-#define FSE_isError ERR_isError
-#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
-
-
-/* **************************************************************
-*  Templates
-****************************************************************/
-/*
-  designed to be included
-  for type-specific functions (template emulation in C)
-  Objective is to write these functions only once, for improved maintenance
-*/
-
-/* safety checks */
-#ifndef FSE_FUNCTION_EXTENSION
-#  error "FSE_FUNCTION_EXTENSION must be defined"
-#endif
-#ifndef FSE_FUNCTION_TYPE
-#  error "FSE_FUNCTION_TYPE must be defined"
-#endif
-
-/* Function names */
-#define FSE_CAT(X,Y) X##Y
-#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
-#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
-
-
-/* Function templates */
-FSE_DTable* FSE_createDTable (unsigned tableLog)
-{
-    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
-    return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
-}
-
-void FSE_freeDTable (FSE_DTable* dt)
-{
-    ZSTD_free(dt);
-}
-
-static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
-{
-    void* const tdPtr = dt+1;   /* because *dt is unsigned, 32-bits aligned on 32-bits */
-    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
-    U16* symbolNext = (U16*)workSpace;
-    BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
-
-    U32 const maxSV1 = maxSymbolValue + 1;
-    U32 const tableSize = 1 << tableLog;
-    U32 highThreshold = tableSize-1;
-
-    /* Sanity Checks */
-    if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
-    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
-    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
-
-    /* Init, lay down lowprob symbols */
-    {   FSE_DTableHeader DTableH;
-        DTableH.tableLog = (U16)tableLog;
-        DTableH.fastMode = 1;
-        {   S16 const largeLimit= (S16)(1 << (tableLog-1));
-            U32 s;
-            for (s=0; s<maxSV1; s++) {
-                if (normalizedCounter[s]==-1) {
-                    tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
-                    symbolNext[s] = 1;
-                } else {
-                    if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
-                    symbolNext[s] = normalizedCounter[s];
-        }   }   }
-        ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
-    }
-
-    /* Spread symbols */
-    if (highThreshold == tableSize - 1) {
-        size_t const tableMask = tableSize-1;
-        size_t const step = FSE_TABLESTEP(tableSize);
-        /* First lay down the symbols in order.
-         * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
-         * misses since small blocks generally have small table logs, so nearly
-         * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
-         * our buffer to handle the over-write.
-         */
-        {
-            U64 const add = 0x0101010101010101ull;
-            size_t pos = 0;
-            U64 sv = 0;
-            U32 s;
-            for (s=0; s<maxSV1; ++s, sv += add) {
-                int i;
-                int const n = normalizedCounter[s];
-                MEM_write64(spread + pos, sv);
-                for (i = 8; i < n; i += 8) {
-                    MEM_write64(spread + pos + i, sv);
-                }
-                pos += n;
-            }
-        }
-        /* Now we spread those positions across the table.
-         * The benefit of doing it in two stages is that we avoid the the
-         * variable size inner loop, which caused lots of branch misses.
-         * Now we can run through all the positions without any branch misses.
-         * We unroll the loop twice, since that is what emperically worked best.
-         */
-        {
-            size_t position = 0;
-            size_t s;
-            size_t const unroll = 2;
-            assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
-            for (s = 0; s < (size_t)tableSize; s += unroll) {
-                size_t u;
-                for (u = 0; u < unroll; ++u) {
-                    size_t const uPosition = (position + (u * step)) & tableMask;
-                    tableDecode[uPosition].symbol = spread[s + u];
-                }
-                position = (position + (unroll * step)) & tableMask;
-            }
-            assert(position == 0);
-        }
-    } else {
-        U32 const tableMask = tableSize-1;
-        U32 const step = FSE_TABLESTEP(tableSize);
-        U32 s, position = 0;
-        for (s=0; s<maxSV1; s++) {
-            int i;
-            for (i=0; i<normalizedCounter[s]; i++) {
-                tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
-                position = (position + step) & tableMask;
-                while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
-        }   }
-        if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
-    }
-
-    /* Build Decoding table */
-    {   U32 u;
-        for (u=0; u<tableSize; u++) {
-            FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
-            U32 const nextState = symbolNext[symbol]++;
-            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
-            tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
-    }   }
-
-    return 0;
-}
-
-size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
-{
-    return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
-}
-
-
-#ifndef FSE_COMMONDEFS_ONLY
-
-/*-*******************************************************
-*  Decompression (Byte symbols)
-*********************************************************/
-size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
-{
-    void* ptr = dt;
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
-    void* dPtr = dt + 1;
-    FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
-
-    DTableH->tableLog = 0;
-    DTableH->fastMode = 0;
-
-    cell->newState = 0;
-    cell->symbol = symbolValue;
-    cell->nbBits = 0;
-
-    return 0;
-}
-
-
-size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
-{
-    void* ptr = dt;
-    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
-    void* dPtr = dt + 1;
-    FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
-    const unsigned tableSize = 1 << nbBits;
-    const unsigned tableMask = tableSize - 1;
-    const unsigned maxSV1 = tableMask+1;
-    unsigned s;
-
-    /* Sanity checks */
-    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
-
-    /* Build Decoding Table */
-    DTableH->tableLog = (U16)nbBits;
-    DTableH->fastMode = 1;
-    for (s=0; s<maxSV1; s++) {
-        dinfo[s].newState = 0;
-        dinfo[s].symbol = (BYTE)s;
-        dinfo[s].nbBits = (BYTE)nbBits;
-    }
-
-    return 0;
-}
-
-FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
-          void* dst, size_t maxDstSize,
-    const void* cSrc, size_t cSrcSize,
-    const FSE_DTable* dt, const unsigned fast)
-{
-    BYTE* const ostart = (BYTE*) dst;
-    BYTE* op = ostart;
-    BYTE* const omax = op + maxDstSize;
-    BYTE* const olimit = omax-3;
-
-    BIT_DStream_t bitD;
-    FSE_DState_t state1;
-    FSE_DState_t state2;
-
-    /* Init */
-    CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));
-
-    FSE_initDState(&state1, &bitD, dt);
-    FSE_initDState(&state2, &bitD, dt);
-
-#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
-
-    /* 4 symbols per loop */
-    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) & (op<olimit) ; op+=4) {
-        op[0] = FSE_GETSYMBOL(&state1);
-
-        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
-            BIT_reloadDStream(&bitD);
-
-        op[1] = FSE_GETSYMBOL(&state2);
-
-        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
-            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
-
-        op[2] = FSE_GETSYMBOL(&state1);
-
-        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
-            BIT_reloadDStream(&bitD);
-
-        op[3] = FSE_GETSYMBOL(&state2);
-    }
-
-    /* tail */
-    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
-    while (1) {
-        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
-        *op++ = FSE_GETSYMBOL(&state1);
-        if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
-            *op++ = FSE_GETSYMBOL(&state2);
-            break;
-        }
-
-        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
-        *op++ = FSE_GETSYMBOL(&state2);
-        if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
-            *op++ = FSE_GETSYMBOL(&state1);
-            break;
-    }   }
-
-    return op-ostart;
-}
-
-
-size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
-                            const void* cSrc, size_t cSrcSize,
-                            const FSE_DTable* dt)
-{
-    const void* ptr = dt;
-    const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
-    const U32 fastMode = DTableH->fastMode;
-
-    /* select fast mode (static) */
-    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
-    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
-}
-
-
-size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
-{
-    return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
-}
-
-typedef struct {
-    short ncount[FSE_MAX_SYMBOL_VALUE + 1];
-    FSE_DTable dtable[1]; /* Dynamically sized */
-} FSE_DecompressWksp;
-
-
-FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
-        void* dst, size_t dstCapacity,
-        const void* cSrc, size_t cSrcSize,
-        unsigned maxLog, void* workSpace, size_t wkspSize,
-        int bmi2)
-{
-    const BYTE* const istart = (const BYTE*)cSrc;
-    const BYTE* ip = istart;
-    unsigned tableLog;
-    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
-    FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
-
-    DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
-    if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
-
-    /* normal FSE decoding mode */
-    {
-        size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
-        if (FSE_isError(NCountLength)) return NCountLength;
-        if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
-        assert(NCountLength <= cSrcSize);
-        ip += NCountLength;
-        cSrcSize -= NCountLength;
-    }
-
-    if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
-    workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog);
-    wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
-
-    CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
-
-    {
-        const void* ptr = wksp->dtable;
-        const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
-        const U32 fastMode = DTableH->fastMode;
-
-        /* select fast mode (static) */
-        if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
-        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
-    }
-}
-
-/* Avoids the FORCE_INLINE of the _body() function. */
-static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
-{
-    return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
-}
-
-#if DYNAMIC_BMI2
-BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
-{
-    return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
-}
-#endif
-
-size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
-{
-#if DYNAMIC_BMI2
-    if (bmi2) {
-        return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
-    }
-#endif
-    (void)bmi2;
-    return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
-}
-
-
-typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
-
-#ifndef ZSTD_NO_UNUSED_FUNCTIONS
-size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) {
-    U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)];
-    return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp));
-}
-
-size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
-{
-    /* Static analyzer seems unable to understand this table will be properly initialized later */
-    U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
-    return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp));
-}
-#endif
-
-
-#endif   /* FSE_COMMONDEFS_ONLY */
diff --git a/dep/zstd/lib/common/huf.h b/dep/zstd/lib/common/huf.h
deleted file mode 100644
index 85518481e..000000000
--- a/dep/zstd/lib/common/huf.h
+++ /dev/null
@@ -1,364 +0,0 @@
-/* ******************************************************************
- * huff0 huffman codec,
- * part of Finite State Entropy library
- * Copyright (c) Yann Collet, Facebook, Inc.
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
-****************************************************************** */
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-#ifndef HUF_H_298734234
-#define HUF_H_298734234
-
-/* *** Dependencies *** */
-#include "zstd_deps.h"    /* size_t */
-
-
-/* *** library symbols visibility *** */
-/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual,
- *        HUF symbols remain "private" (internal symbols for library only).
- *        Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */
-#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
-#  define HUF_PUBLIC_API __attribute__ ((visibility ("default")))
-#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)   /* Visual expected */
-#  define HUF_PUBLIC_API __declspec(dllexport)
-#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
-#  define HUF_PUBLIC_API __declspec(dllimport)  /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */
-#else
-#  define HUF_PUBLIC_API
-#endif
-
-
-/* ========================== */
-/* ***  simple functions  *** */
-/* ========================== */
-
-/** HUF_compress() :
- *  Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
- * 'dst' buffer must be already allocated.
- *  Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
- * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
- * @return : size of compressed data (<= `dstCapacity`).
- *  Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
- *                   if HUF_isError(return), compression failed (more details using HUF_getErrorName())
- */
-HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
-                             const void* src, size_t srcSize);
-
-/** HUF_decompress() :
- *  Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
- *  into already allocated buffer 'dst', of minimum size 'dstSize'.
- * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
- *  Note : in contrast with FSE, HUF_decompress can regenerate
- *         RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
- *         because it knows size to regenerate (originalSize).
- * @return : size of regenerated data (== originalSize),
- *           or an error code, which can be tested using HUF_isError()
- */
-HUF_PUBLIC_API size_t HUF_decompress(void* dst,  size_t originalSize,
-                               const void* cSrc, size_t cSrcSize);
-
-
-/* ***   Tool functions *** */
-#define HUF_BLOCKSIZE_MAX (128 * 1024)                  /**< maximum input size for a single block compressed with HUF_compress */
-HUF_PUBLIC_API size_t HUF_compressBound(size_t size);   /**< maximum compressed size (worst case) */
-
-/* Error Management */
-HUF_PUBLIC_API unsigned    HUF_isError(size_t code);       /**< tells if a return value is an error code */
-HUF_PUBLIC_API const char* HUF_getErrorName(size_t code);  /**< provides error code string (useful for debugging) */
-
-
-/* ***   Advanced function   *** */
-
-/** HUF_compress2() :
- *  Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`.
- * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX .
- * `tableLog` must be `<= HUF_TABLELOG_MAX` . */
-HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
-                               const void* src, size_t srcSize,
-                               unsigned maxSymbolValue, unsigned tableLog);
-
-/** HUF_compress4X_wksp() :
- *  Same as HUF_compress2(), but uses externally allocated `workSpace`.
- * `workspace` must be at least as large as HUF_WORKSPACE_SIZE */
-#define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
-#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
-HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
-                                     const void* src, size_t srcSize,
-                                     unsigned maxSymbolValue, unsigned tableLog,
-                                     void* workSpace, size_t wkspSize);
-
-#endif   /* HUF_H_298734234 */
-
-/* ******************************************************************
- *  WARNING !!
- *  The following section contains advanced and experimental definitions
- *  which shall never be used in the context of a dynamic library,
- *  because they are not guaranteed to remain stable in the future.
- *  Only consider them in association with static linking.
- * *****************************************************************/
-#if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
-#define HUF_H_HUF_STATIC_LINKING_ONLY
-
-/* *** Dependencies *** */
-#include "mem.h"   /* U32 */
-#define FSE_STATIC_LINKING_ONLY
-#include "fse.h"
-
-
-/* *** Constants *** */
-#define HUF_TABLELOG_MAX      12      /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */
-#define HUF_TABLELOG_DEFAULT  11      /* default tableLog value when none specified */
-#define HUF_SYMBOLVALUE_MAX  255
-
-#define HUF_TABLELOG_ABSOLUTEMAX  12  /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
-#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
-#  error "HUF_TABLELOG_MAX is too large !"
-#endif
-
-
-/* ****************************************
-*  Static allocation
-******************************************/
-/* HUF buffer bounds */
-#define HUF_CTABLEBOUND 129
-#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true when incompressible is pre-filtered with fast heuristic */
-#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
-
-/* static allocation of HUF's Compression Table */
-/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
-typedef size_t HUF_CElt;   /* consider it an incomplete type */
-#define HUF_CTABLE_SIZE_ST(maxSymbolValue)   ((maxSymbolValue)+2)   /* Use tables of size_t, for proper alignment */
-#define HUF_CTABLE_SIZE(maxSymbolValue)       (HUF_CTABLE_SIZE_ST(maxSymbolValue) * sizeof(size_t))
-#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
-    HUF_CElt name[HUF_CTABLE_SIZE_ST(maxSymbolValue)] /* no final ; */
-
-/* static allocation of HUF's DTable */
-typedef U32 HUF_DTable;
-#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<(maxTableLog)))
-#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \
-        HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) }
-#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
-        HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) }
-
-
-/* ****************************************
-*  Advanced decompression functions
-******************************************/
-size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
-#ifndef HUF_FORCE_DECOMPRESS_X1
-size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
-#endif
-
-size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< decodes RLE and uncompressed */
-size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
-size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */
-size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
-size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< single-symbol decoder */
-#ifndef HUF_FORCE_DECOMPRESS_X1
-size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
-size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< double-symbols decoder */
-#endif
-
-
-/* ****************************************
- *  HUF detailed API
- * ****************************************/
-
-/*! HUF_compress() does the following:
- *  1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
- *  2. (optional) refine tableLog using HUF_optimalTableLog()
- *  3. build Huffman table from count using HUF_buildCTable()
- *  4. save Huffman table to memory buffer using HUF_writeCTable()
- *  5. encode the data stream using HUF_compress4X_usingCTable()
- *
- *  The following API allows targeting specific sub-functions for advanced tasks.
- *  For example, it's possible to compress several blocks using the same 'CTable',
- *  or to save and regenerate 'CTable' using external methods.
- */
-unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
-size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);   /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
-size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
-size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
-size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
-size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
-size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
-int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
-
-typedef enum {
-   HUF_repeat_none,  /**< Cannot use the previous table */
-   HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
-   HUF_repeat_valid  /**< Can use the previous table and it is assumed to be valid */
- } HUF_repeat;
-/** HUF_compress4X_repeat() :
- *  Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
- *  If it uses hufTable it does not modify hufTable or repeat.
- *  If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
- *  If preferRepeat then the old table will always be used if valid.
- *  If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
-size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
-                       const void* src, size_t srcSize,
-                       unsigned maxSymbolValue, unsigned tableLog,
-                       void* workSpace, size_t wkspSize,    /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
-                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
-
-/** HUF_buildCTable_wksp() :
- *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
- * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
- */
-#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
-#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
-size_t HUF_buildCTable_wksp (HUF_CElt* tree,
-                       const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
-                             void* workSpace, size_t wkspSize);
-
-/*! HUF_readStats() :
- *  Read compact Huffman tree, saved by HUF_writeCTable().
- * `huffWeight` is destination buffer.
- * @return : size read from `src` , or an error Code .
- *  Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
-size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
-                     U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
-                     const void* src, size_t srcSize);
-
-/*! HUF_readStats_wksp() :
- * Same as HUF_readStats() but takes an external workspace which must be
- * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE.
- * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
- */
-#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1)
-#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned))
-size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
-                          U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
-                          const void* src, size_t srcSize,
-                          void* workspace, size_t wkspSize,
-                          int bmi2);
-
-/** HUF_readCTable() :
- *  Loading a CTable saved with HUF_writeCTable() */
-size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
-
-/** HUF_getNbBitsFromCTable() :
- *  Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
- *  Note 1 : is not inlined, as HUF_CElt definition is private */
-U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue);
-
-/*
- * HUF_decompress() does the following:
- * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics
- * 2. build Huffman table from save, using HUF_readDTableX?()
- * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable()
- */
-
-/** HUF_selectDecoder() :
- *  Tells which decoder is likely to decode faster,
- *  based on a set of pre-computed metrics.
- * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
- *  Assumption : 0 < dstSize <= 128 KB */
-U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
-
-/**
- *  The minimum workspace size for the `workSpace` used in
- *  HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp().
- *
- *  The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
- *  HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
- *  Buffer overflow errors may potentially occur if code modifications result in
- *  a required workspace size greater than that specified in the following
- *  macro.
- */
-#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
-#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
-
-#ifndef HUF_FORCE_DECOMPRESS_X2
-size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
-size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
-#endif
-#ifndef HUF_FORCE_DECOMPRESS_X1
-size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
-size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
-#endif
-
-size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
-#ifndef HUF_FORCE_DECOMPRESS_X2
-size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
-#endif
-#ifndef HUF_FORCE_DECOMPRESS_X1
-size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
-#endif
-
-
-/* ====================== */
-/* single stream variants */
-/* ====================== */
-
-size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
-size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);  /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */
-size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
-size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
-/** HUF_compress1X_repeat() :
- *  Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
- *  If it uses hufTable it does not modify hufTable or repeat.
- *  If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
- *  If preferRepeat then the old table will always be used if valid.
- *  If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
-size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
-                       const void* src, size_t srcSize,
-                       unsigned maxSymbolValue, unsigned tableLog,
-                       void* workSpace, size_t wkspSize,   /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
-                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
-
-size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
-#ifndef HUF_FORCE_DECOMPRESS_X1
-size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbol decoder */
-#endif
-
-size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
-size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
-#ifndef HUF_FORCE_DECOMPRESS_X2
-size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
-size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< single-symbol decoder */
-#endif
-#ifndef HUF_FORCE_DECOMPRESS_X1
-size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
-size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< double-symbols decoder */
-#endif
-
-size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);   /**< automatic selection of sing or double symbol decoder, based on DTable */
-#ifndef HUF_FORCE_DECOMPRESS_X2
-size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
-#endif
-#ifndef HUF_FORCE_DECOMPRESS_X1
-size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
-#endif
-
-/* BMI2 variants.
- * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
- */
-size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
-#ifndef HUF_FORCE_DECOMPRESS_X2
-size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
-#endif
-size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
-size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
-#ifndef HUF_FORCE_DECOMPRESS_X2
-size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
-#endif
-#ifndef HUF_FORCE_DECOMPRESS_X1
-size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
-#endif
-
-#endif /* HUF_STATIC_LINKING_ONLY */
-
-#if defined (__cplusplus)
-}
-#endif
diff --git a/dep/zstd/lib/common/mem.h b/dep/zstd/lib/common/mem.h
deleted file mode 100644
index 85581c384..000000000
--- a/dep/zstd/lib/common/mem.h
+++ /dev/null
@@ -1,442 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef MEM_H_MODULE
-#define MEM_H_MODULE
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-/*-****************************************
-*  Dependencies
-******************************************/
-#include <stddef.h>  /* size_t, ptrdiff_t */
-#include "compiler.h"  /* __has_builtin */
-#include "debug.h"  /* DEBUG_STATIC_ASSERT */
-#include "zstd_deps.h"  /* ZSTD_memcpy */
-
-
-/*-****************************************
-*  Compiler specifics
-******************************************/
-#if defined(_MSC_VER)   /* Visual Studio */
-#   include <stdlib.h>  /* _byteswap_ulong */
-#   include <intrin.h>  /* _byteswap_* */
-#endif
-#if defined(__GNUC__)
-#  define MEM_STATIC static __inline __attribute__((unused))
-#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-#  define MEM_STATIC static inline
-#elif defined(_MSC_VER)
-#  define MEM_STATIC static __inline
-#else
-#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
-#endif
-
-/*-**************************************************************
-*  Basic Types
-*****************************************************************/
-#if  !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-#  if defined(_AIX)
-#    include <inttypes.h>
-#  else
-#    include <stdint.h> /* intptr_t */
-#  endif
-  typedef   uint8_t BYTE;
-  typedef   uint8_t U8;
-  typedef    int8_t S8;
-  typedef  uint16_t U16;
-  typedef   int16_t S16;
-  typedef  uint32_t U32;
-  typedef   int32_t S32;
-  typedef  uint64_t U64;
-  typedef   int64_t S64;
-#else
-# include <limits.h>
-#if CHAR_BIT != 8
-#  error "this implementation requires char to be exactly 8-bit type"
-#endif
-  typedef unsigned char      BYTE;
-  typedef unsigned char      U8;
-  typedef   signed char      S8;
-#if USHRT_MAX != 65535
-#  error "this implementation requires short to be exactly 16-bit type"
-#endif
-  typedef unsigned short      U16;
-  typedef   signed short      S16;
-#if UINT_MAX != 4294967295
-#  error "this implementation requires int to be exactly 32-bit type"
-#endif
-  typedef unsigned int        U32;
-  typedef   signed int        S32;
-/* note : there are no limits defined for long long type in C90.
- * limits exist in C99, however, in such case, <stdint.h> is preferred */
-  typedef unsigned long long  U64;
-  typedef   signed long long  S64;
-#endif
-
-
-/*-**************************************************************
-*  Memory I/O API
-*****************************************************************/
-/*=== Static platform detection ===*/
-MEM_STATIC unsigned MEM_32bits(void);
-MEM_STATIC unsigned MEM_64bits(void);
-MEM_STATIC unsigned MEM_isLittleEndian(void);
-
-/*=== Native unaligned read/write ===*/
-MEM_STATIC U16 MEM_read16(const void* memPtr);
-MEM_STATIC U32 MEM_read32(const void* memPtr);
-MEM_STATIC U64 MEM_read64(const void* memPtr);
-MEM_STATIC size_t MEM_readST(const void* memPtr);
-
-MEM_STATIC void MEM_write16(void* memPtr, U16 value);
-MEM_STATIC void MEM_write32(void* memPtr, U32 value);
-MEM_STATIC void MEM_write64(void* memPtr, U64 value);
-
-/*=== Little endian unaligned read/write ===*/
-MEM_STATIC U16 MEM_readLE16(const void* memPtr);
-MEM_STATIC U32 MEM_readLE24(const void* memPtr);
-MEM_STATIC U32 MEM_readLE32(const void* memPtr);
-MEM_STATIC U64 MEM_readLE64(const void* memPtr);
-MEM_STATIC size_t MEM_readLEST(const void* memPtr);
-
-MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val);
-MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val);
-MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32);
-MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64);
-MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val);
-
-/*=== Big endian unaligned read/write ===*/
-MEM_STATIC U32 MEM_readBE32(const void* memPtr);
-MEM_STATIC U64 MEM_readBE64(const void* memPtr);
-MEM_STATIC size_t MEM_readBEST(const void* memPtr);
-
-MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32);
-MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64);
-MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val);
-
-/*=== Byteswap ===*/
-MEM_STATIC U32 MEM_swap32(U32 in);
-MEM_STATIC U64 MEM_swap64(U64 in);
-MEM_STATIC size_t MEM_swapST(size_t in);
-
-
-/*-**************************************************************
-*  Memory I/O Implementation
-*****************************************************************/
-/* MEM_FORCE_MEMORY_ACCESS :
- * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
- * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
- * The below switch allow to select different access method for improved performance.
- * Method 0 (default) : use `memcpy()`. Safe and portable.
- * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable).
- *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
- * Method 2 : direct access. This method is portable but violate C standard.
- *            It can generate buggy code on targets depending on alignment.
- *            In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6)
- * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
- * Prefer these methods in priority order (0 > 1 > 2)
- */
-#ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-#  if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
-#    define MEM_FORCE_MEMORY_ACCESS 1
-#  endif
-#endif
-
-MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; }
-MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; }
-
-MEM_STATIC unsigned MEM_isLittleEndian(void)
-{
-#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
-    return 1;
-#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
-    return 0;
-#elif defined(__clang__) && __LITTLE_ENDIAN__
-    return 1;
-#elif defined(__clang__) && __BIG_ENDIAN__
-    return 0;
-#elif defined(_MSC_VER) && (_M_AMD64 || _M_IX86)
-    return 1;
-#elif defined(__DMC__) && defined(_M_IX86)
-    return 1;
-#else
-    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
-    return one.c[0];
-#endif
-}
-
-#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
-
-/* violates C standard, by lying on structure alignment.
-Only use if no other choice to achieve best performance on target platform */
-MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
-MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
-MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
-MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }
-
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
-MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
-MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
-
-#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
-
-/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
-/* currently only defined for gcc and icc */
-#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
-    __pragma( pack(push, 1) )
-    typedef struct { U16 v; } unalign16;
-    typedef struct { U32 v; } unalign32;
-    typedef struct { U64 v; } unalign64;
-    typedef struct { size_t v; } unalignArch;
-    __pragma( pack(pop) )
-#else
-    typedef struct { U16 v; } __attribute__((packed)) unalign16;
-    typedef struct { U32 v; } __attribute__((packed)) unalign32;
-    typedef struct { U64 v; } __attribute__((packed)) unalign64;
-    typedef struct { size_t v; } __attribute__((packed)) unalignArch;
-#endif
-
-MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; }
-MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; }
-MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; }
-MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; }
-
-MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; }
-MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; }
-MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; }
-
-#else
-
-/* default method, safe and standard.
-   can sometimes prove slower */
-
-MEM_STATIC U16 MEM_read16(const void* memPtr)
-{
-    U16 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
-}
-
-MEM_STATIC U32 MEM_read32(const void* memPtr)
-{
-    U32 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
-}
-
-MEM_STATIC U64 MEM_read64(const void* memPtr)
-{
-    U64 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
-}
-
-MEM_STATIC size_t MEM_readST(const void* memPtr)
-{
-    size_t val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
-}
-
-MEM_STATIC void MEM_write16(void* memPtr, U16 value)
-{
-    ZSTD_memcpy(memPtr, &value, sizeof(value));
-}
-
-MEM_STATIC void MEM_write32(void* memPtr, U32 value)
-{
-    ZSTD_memcpy(memPtr, &value, sizeof(value));
-}
-
-MEM_STATIC void MEM_write64(void* memPtr, U64 value)
-{
-    ZSTD_memcpy(memPtr, &value, sizeof(value));
-}
-
-#endif /* MEM_FORCE_MEMORY_ACCESS */
-
-MEM_STATIC U32 MEM_swap32(U32 in)
-{
-#if defined(_MSC_VER)     /* Visual Studio */
-    return _byteswap_ulong(in);
-#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
-  || (defined(__clang__) && __has_builtin(__builtin_bswap32))
-    return __builtin_bswap32(in);
-#else
-    return  ((in << 24) & 0xff000000 ) |
-            ((in <<  8) & 0x00ff0000 ) |
-            ((in >>  8) & 0x0000ff00 ) |
-            ((in >> 24) & 0x000000ff );
-#endif
-}
-
-MEM_STATIC U64 MEM_swap64(U64 in)
-{
-#if defined(_MSC_VER)     /* Visual Studio */
-    return _byteswap_uint64(in);
-#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
-  || (defined(__clang__) && __has_builtin(__builtin_bswap64))
-    return __builtin_bswap64(in);
-#else
-    return  ((in << 56) & 0xff00000000000000ULL) |
-            ((in << 40) & 0x00ff000000000000ULL) |
-            ((in << 24) & 0x0000ff0000000000ULL) |
-            ((in << 8)  & 0x000000ff00000000ULL) |
-            ((in >> 8)  & 0x00000000ff000000ULL) |
-            ((in >> 24) & 0x0000000000ff0000ULL) |
-            ((in >> 40) & 0x000000000000ff00ULL) |
-            ((in >> 56) & 0x00000000000000ffULL);
-#endif
-}
-
-MEM_STATIC size_t MEM_swapST(size_t in)
-{
-    if (MEM_32bits())
-        return (size_t)MEM_swap32((U32)in);
-    else
-        return (size_t)MEM_swap64((U64)in);
-}
-
-/*=== Little endian r/w ===*/
-
-MEM_STATIC U16 MEM_readLE16(const void* memPtr)
-{
-    if (MEM_isLittleEndian())
-        return MEM_read16(memPtr);
-    else {
-        const BYTE* p = (const BYTE*)memPtr;
-        return (U16)(p[0] + (p[1]<<8));
-    }
-}
-
-MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
-{
-    if (MEM_isLittleEndian()) {
-        MEM_write16(memPtr, val);
-    } else {
-        BYTE* p = (BYTE*)memPtr;
-        p[0] = (BYTE)val;
-        p[1] = (BYTE)(val>>8);
-    }
-}
-
-MEM_STATIC U32 MEM_readLE24(const void* memPtr)
-{
-    return (U32)MEM_readLE16(memPtr) + ((U32)(((const BYTE*)memPtr)[2]) << 16);
-}
-
-MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val)
-{
-    MEM_writeLE16(memPtr, (U16)val);
-    ((BYTE*)memPtr)[2] = (BYTE)(val>>16);
-}
-
-MEM_STATIC U32 MEM_readLE32(const void* memPtr)
-{
-    if (MEM_isLittleEndian())
-        return MEM_read32(memPtr);
-    else
-        return MEM_swap32(MEM_read32(memPtr));
-}
-
-MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32)
-{
-    if (MEM_isLittleEndian())
-        MEM_write32(memPtr, val32);
-    else
-        MEM_write32(memPtr, MEM_swap32(val32));
-}
-
-MEM_STATIC U64 MEM_readLE64(const void* memPtr)
-{
-    if (MEM_isLittleEndian())
-        return MEM_read64(memPtr);
-    else
-        return MEM_swap64(MEM_read64(memPtr));
-}
-
-MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64)
-{
-    if (MEM_isLittleEndian())
-        MEM_write64(memPtr, val64);
-    else
-        MEM_write64(memPtr, MEM_swap64(val64));
-}
-
-MEM_STATIC size_t MEM_readLEST(const void* memPtr)
-{
-    if (MEM_32bits())
-        return (size_t)MEM_readLE32(memPtr);
-    else
-        return (size_t)MEM_readLE64(memPtr);
-}
-
-MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val)
-{
-    if (MEM_32bits())
-        MEM_writeLE32(memPtr, (U32)val);
-    else
-        MEM_writeLE64(memPtr, (U64)val);
-}
-
-/*=== Big endian r/w ===*/
-
-MEM_STATIC U32 MEM_readBE32(const void* memPtr)
-{
-    if (MEM_isLittleEndian())
-        return MEM_swap32(MEM_read32(memPtr));
-    else
-        return MEM_read32(memPtr);
-}
-
-MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32)
-{
-    if (MEM_isLittleEndian())
-        MEM_write32(memPtr, MEM_swap32(val32));
-    else
-        MEM_write32(memPtr, val32);
-}
-
-MEM_STATIC U64 MEM_readBE64(const void* memPtr)
-{
-    if (MEM_isLittleEndian())
-        return MEM_swap64(MEM_read64(memPtr));
-    else
-        return MEM_read64(memPtr);
-}
-
-MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64)
-{
-    if (MEM_isLittleEndian())
-        MEM_write64(memPtr, MEM_swap64(val64));
-    else
-        MEM_write64(memPtr, val64);
-}
-
-MEM_STATIC size_t MEM_readBEST(const void* memPtr)
-{
-    if (MEM_32bits())
-        return (size_t)MEM_readBE32(memPtr);
-    else
-        return (size_t)MEM_readBE64(memPtr);
-}
-
-MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
-{
-    if (MEM_32bits())
-        MEM_writeBE32(memPtr, (U32)val);
-    else
-        MEM_writeBE64(memPtr, (U64)val);
-}
-
-/* code only tested on 32 and 64 bits systems */
-MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
-
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif /* MEM_H_MODULE */
diff --git a/dep/zstd/lib/common/pool.c b/dep/zstd/lib/common/pool.c
deleted file mode 100644
index 2e37cdd73..000000000
--- a/dep/zstd/lib/common/pool.c
+++ /dev/null
@@ -1,355 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-
-/* ======   Dependencies   ======= */
-#include "zstd_deps.h" /* size_t */
-#include "debug.h"     /* assert */
-#include "zstd_internal.h"  /* ZSTD_customMalloc, ZSTD_customFree */
-#include "pool.h"
-
-/* ======   Compiler specifics   ====== */
-#if defined(_MSC_VER)
-#  pragma warning(disable : 4204)        /* disable: C4204: non-constant aggregate initializer */
-#endif
-
-
-#ifdef ZSTD_MULTITHREAD
-
-#include "threading.h"   /* pthread adaptation */
-
-/* A job is a function and an opaque argument */
-typedef struct POOL_job_s {
-    POOL_function function;
-    void *opaque;
-} POOL_job;
-
-struct POOL_ctx_s {
-    ZSTD_customMem customMem;
-    /* Keep track of the threads */
-    ZSTD_pthread_t* threads;
-    size_t threadCapacity;
-    size_t threadLimit;
-
-    /* The queue is a circular buffer */
-    POOL_job *queue;
-    size_t queueHead;
-    size_t queueTail;
-    size_t queueSize;
-
-    /* The number of threads working on jobs */
-    size_t numThreadsBusy;
-    /* Indicates if the queue is empty */
-    int queueEmpty;
-
-    /* The mutex protects the queue */
-    ZSTD_pthread_mutex_t queueMutex;
-    /* Condition variable for pushers to wait on when the queue is full */
-    ZSTD_pthread_cond_t queuePushCond;
-    /* Condition variables for poppers to wait on when the queue is empty */
-    ZSTD_pthread_cond_t queuePopCond;
-    /* Indicates if the queue is shutting down */
-    int shutdown;
-};
-
-/* POOL_thread() :
- * Work thread for the thread pool.
- * Waits for jobs and executes them.
- * @returns : NULL on failure else non-null.
- */
-static void* POOL_thread(void* opaque) {
-    POOL_ctx* const ctx = (POOL_ctx*)opaque;
-    if (!ctx) { return NULL; }
-    for (;;) {
-        /* Lock the mutex and wait for a non-empty queue or until shutdown */
-        ZSTD_pthread_mutex_lock(&ctx->queueMutex);
-
-        while ( ctx->queueEmpty
-            || (ctx->numThreadsBusy >= ctx->threadLimit) ) {
-            if (ctx->shutdown) {
-                /* even if !queueEmpty, (possible if numThreadsBusy >= threadLimit),
-                 * a few threads will be shutdown while !queueEmpty,
-                 * but enough threads will remain active to finish the queue */
-                ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
-                return opaque;
-            }
-            ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
-        }
-        /* Pop a job off the queue */
-        {   POOL_job const job = ctx->queue[ctx->queueHead];
-            ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
-            ctx->numThreadsBusy++;
-            ctx->queueEmpty = (ctx->queueHead == ctx->queueTail);
-            /* Unlock the mutex, signal a pusher, and run the job */
-            ZSTD_pthread_cond_signal(&ctx->queuePushCond);
-            ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
-
-            job.function(job.opaque);
-
-            /* If the intended queue size was 0, signal after finishing job */
-            ZSTD_pthread_mutex_lock(&ctx->queueMutex);
-            ctx->numThreadsBusy--;
-            if (ctx->queueSize == 1) {
-                ZSTD_pthread_cond_signal(&ctx->queuePushCond);
-            }
-            ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
-        }
-    }  /* for (;;) */
-    assert(0);  /* Unreachable */
-}
-
-/* ZSTD_createThreadPool() : public access point */
-POOL_ctx* ZSTD_createThreadPool(size_t numThreads) {
-    return POOL_create (numThreads, 0);
-}
-
-POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
-    return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
-}
-
-POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
-                               ZSTD_customMem customMem)
-{
-    POOL_ctx* ctx;
-    /* Check parameters */
-    if (!numThreads) { return NULL; }
-    /* Allocate the context and zero initialize */
-    ctx = (POOL_ctx*)ZSTD_customCalloc(sizeof(POOL_ctx), customMem);
-    if (!ctx) { return NULL; }
-    /* Initialize the job queue.
-     * It needs one extra space since one space is wasted to differentiate
-     * empty and full queues.
-     */
-    ctx->queueSize = queueSize + 1;
-    ctx->queue = (POOL_job*)ZSTD_customMalloc(ctx->queueSize * sizeof(POOL_job), customMem);
-    ctx->queueHead = 0;
-    ctx->queueTail = 0;
-    ctx->numThreadsBusy = 0;
-    ctx->queueEmpty = 1;
-    {
-        int error = 0;
-        error |= ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
-        error |= ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
-        error |= ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
-        if (error) { POOL_free(ctx); return NULL; }
-    }
-    ctx->shutdown = 0;
-    /* Allocate space for the thread handles */
-    ctx->threads = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
-    ctx->threadCapacity = 0;
-    ctx->customMem = customMem;
-    /* Check for errors */
-    if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; }
-    /* Initialize the threads */
-    {   size_t i;
-        for (i = 0; i < numThreads; ++i) {
-            if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) {
-                ctx->threadCapacity = i;
-                POOL_free(ctx);
-                return NULL;
-        }   }
-        ctx->threadCapacity = numThreads;
-        ctx->threadLimit = numThreads;
-    }
-    return ctx;
-}
-
-/*! POOL_join() :
-    Shutdown the queue, wake any sleeping threads, and join all of the threads.
-*/
-static void POOL_join(POOL_ctx* ctx) {
-    /* Shut down the queue */
-    ZSTD_pthread_mutex_lock(&ctx->queueMutex);
-    ctx->shutdown = 1;
-    ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
-    /* Wake up sleeping threads */
-    ZSTD_pthread_cond_broadcast(&ctx->queuePushCond);
-    ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
-    /* Join all of the threads */
-    {   size_t i;
-        for (i = 0; i < ctx->threadCapacity; ++i) {
-            ZSTD_pthread_join(ctx->threads[i], NULL);  /* note : could fail */
-    }   }
-}
-
-void POOL_free(POOL_ctx *ctx) {
-    if (!ctx) { return; }
-    POOL_join(ctx);
-    ZSTD_pthread_mutex_destroy(&ctx->queueMutex);
-    ZSTD_pthread_cond_destroy(&ctx->queuePushCond);
-    ZSTD_pthread_cond_destroy(&ctx->queuePopCond);
-    ZSTD_customFree(ctx->queue, ctx->customMem);
-    ZSTD_customFree(ctx->threads, ctx->customMem);
-    ZSTD_customFree(ctx, ctx->customMem);
-}
-
-void ZSTD_freeThreadPool (ZSTD_threadPool* pool) {
-  POOL_free (pool);
-}
-
-size_t POOL_sizeof(const POOL_ctx* ctx) {
-    if (ctx==NULL) return 0;  /* supports sizeof NULL */
-    return sizeof(*ctx)
-        + ctx->queueSize * sizeof(POOL_job)
-        + ctx->threadCapacity * sizeof(ZSTD_pthread_t);
-}
-
-
-/* @return : 0 on success, 1 on error */
-static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads)
-{
-    if (numThreads <= ctx->threadCapacity) {
-        if (!numThreads) return 1;
-        ctx->threadLimit = numThreads;
-        return 0;
-    }
-    /* numThreads > threadCapacity */
-    {   ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
-        if (!threadPool) return 1;
-        /* replace existing thread pool */
-        ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
-        ZSTD_customFree(ctx->threads, ctx->customMem);
-        ctx->threads = threadPool;
-        /* Initialize additional threads */
-        {   size_t threadId;
-            for (threadId = ctx->threadCapacity; threadId < numThreads; ++threadId) {
-                if (ZSTD_pthread_create(&threadPool[threadId], NULL, &POOL_thread, ctx)) {
-                    ctx->threadCapacity = threadId;
-                    return 1;
-            }   }
-    }   }
-    /* successfully expanded */
-    ctx->threadCapacity = numThreads;
-    ctx->threadLimit = numThreads;
-    return 0;
-}
-
-/* @return : 0 on success, 1 on error */
-int POOL_resize(POOL_ctx* ctx, size_t numThreads)
-{
-    int result;
-    if (ctx==NULL) return 1;
-    ZSTD_pthread_mutex_lock(&ctx->queueMutex);
-    result = POOL_resize_internal(ctx, numThreads);
-    ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
-    ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
-    return result;
-}
-
-/**
- * Returns 1 if the queue is full and 0 otherwise.
- *
- * When queueSize is 1 (pool was created with an intended queueSize of 0),
- * then a queue is empty if there is a thread free _and_ no job is waiting.
- */
-static int isQueueFull(POOL_ctx const* ctx) {
-    if (ctx->queueSize > 1) {
-        return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize);
-    } else {
-        return (ctx->numThreadsBusy == ctx->threadLimit) ||
-               !ctx->queueEmpty;
-    }
-}
-
-
-static void
-POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque)
-{
-    POOL_job const job = {function, opaque};
-    assert(ctx != NULL);
-    if (ctx->shutdown) return;
-
-    ctx->queueEmpty = 0;
-    ctx->queue[ctx->queueTail] = job;
-    ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize;
-    ZSTD_pthread_cond_signal(&ctx->queuePopCond);
-}
-
-void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque)
-{
-    assert(ctx != NULL);
-    ZSTD_pthread_mutex_lock(&ctx->queueMutex);
-    /* Wait until there is space in the queue for the new job */
-    while (isQueueFull(ctx) && (!ctx->shutdown)) {
-        ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
-    }
-    POOL_add_internal(ctx, function, opaque);
-    ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
-}
-
-
-int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque)
-{
-    assert(ctx != NULL);
-    ZSTD_pthread_mutex_lock(&ctx->queueMutex);
-    if (isQueueFull(ctx)) {
-        ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
-        return 0;
-    }
-    POOL_add_internal(ctx, function, opaque);
-    ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
-    return 1;
-}
-
-
-#else  /* ZSTD_MULTITHREAD  not defined */
-
-/* ========================== */
-/* No multi-threading support */
-/* ========================== */
-
-
-/* We don't need any data, but if it is empty, malloc() might return NULL. */
-struct POOL_ctx_s {
-    int dummy;
-};
-static POOL_ctx g_poolCtx;
-
-POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
-    return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
-}
-
-POOL_ctx*
-POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem)
-{
-    (void)numThreads;
-    (void)queueSize;
-    (void)customMem;
-    return &g_poolCtx;
-}
-
-void POOL_free(POOL_ctx* ctx) {
-    assert(!ctx || ctx == &g_poolCtx);
-    (void)ctx;
-}
-
-int POOL_resize(POOL_ctx* ctx, size_t numThreads) {
-    (void)ctx; (void)numThreads;
-    return 0;
-}
-
-void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) {
-    (void)ctx;
-    function(opaque);
-}
-
-int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) {
-    (void)ctx;
-    function(opaque);
-    return 1;
-}
-
-size_t POOL_sizeof(const POOL_ctx* ctx) {
-    if (ctx==NULL) return 0;  /* supports sizeof NULL */
-    assert(ctx == &g_poolCtx);
-    return sizeof(*ctx);
-}
-
-#endif  /* ZSTD_MULTITHREAD */
diff --git a/dep/zstd/lib/common/pool.h b/dep/zstd/lib/common/pool.h
deleted file mode 100644
index 0ebde1805..000000000
--- a/dep/zstd/lib/common/pool.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef POOL_H
-#define POOL_H
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-
-#include "zstd_deps.h"
-#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_customMem */
-#include "../zstd.h"
-
-typedef struct POOL_ctx_s POOL_ctx;
-
-/*! POOL_create() :
- *  Create a thread pool with at most `numThreads` threads.
- * `numThreads` must be at least 1.
- *  The maximum number of queued jobs before blocking is `queueSize`.
- * @return : POOL_ctx pointer on success, else NULL.
-*/
-POOL_ctx* POOL_create(size_t numThreads, size_t queueSize);
-
-POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
-                               ZSTD_customMem customMem);
-
-/*! POOL_free() :
- *  Free a thread pool returned by POOL_create().
- */
-void POOL_free(POOL_ctx* ctx);
-
-/*! POOL_resize() :
- *  Expands or shrinks pool's number of threads.
- *  This is more efficient than releasing + creating a new context,
- *  since it tries to preserve and re-use existing threads.
- * `numThreads` must be at least 1.
- * @return : 0 when resize was successful,
- *           !0 (typically 1) if there is an error.
- *    note : only numThreads can be resized, queueSize remains unchanged.
- */
-int POOL_resize(POOL_ctx* ctx, size_t numThreads);
-
-/*! POOL_sizeof() :
- * @return threadpool memory usage
- *  note : compatible with NULL (returns 0 in this case)
- */
-size_t POOL_sizeof(const POOL_ctx* ctx);
-
-/*! POOL_function :
- *  The function type that can be added to a thread pool.
- */
-typedef void (*POOL_function)(void*);
-
-/*! POOL_add() :
- *  Add the job `function(opaque)` to the thread pool. `ctx` must be valid.
- *  Possibly blocks until there is room in the queue.
- *  Note : The function may be executed asynchronously,
- *         therefore, `opaque` must live until function has been completed.
- */
-void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque);
-
-
-/*! POOL_tryAdd() :
- *  Add the job `function(opaque)` to thread pool _if_ a queue slot is available.
- *  Returns immediately even if not (does not block).
- * @return : 1 if successful, 0 if not.
- */
-int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque);
-
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif
diff --git a/dep/zstd/lib/common/portability_macros.h b/dep/zstd/lib/common/portability_macros.h
deleted file mode 100644
index 2143817f5..000000000
--- a/dep/zstd/lib/common/portability_macros.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef ZSTD_PORTABILITY_MACROS_H
-#define ZSTD_PORTABILITY_MACROS_H
-
-/**
- * This header file contains macro defintions to support portability.
- * This header is shared between C and ASM code, so it MUST only
- * contain macro definitions. It MUST not contain any C code.
- *
- * This header ONLY defines macros to detect platforms/feature support.
- *
- */
-
-
-/* compat. with non-clang compilers */
-#ifndef __has_attribute
-  #define __has_attribute(x) 0
-#endif
-
-/* compat. with non-clang compilers */
-#ifndef __has_builtin
-#  define __has_builtin(x) 0
-#endif
-
-/* compat. with non-clang compilers */
-#ifndef __has_feature
-#  define __has_feature(x) 0
-#endif
-
-/* detects whether we are being compiled under msan */
-#ifndef ZSTD_MEMORY_SANITIZER
-#  if __has_feature(memory_sanitizer)
-#    define ZSTD_MEMORY_SANITIZER 1
-#  else
-#    define ZSTD_MEMORY_SANITIZER 0
-#  endif
-#endif
-
-/* detects whether we are being compiled under asan */
-#ifndef ZSTD_ADDRESS_SANITIZER
-#  if __has_feature(address_sanitizer)
-#    define ZSTD_ADDRESS_SANITIZER 1
-#  elif defined(__SANITIZE_ADDRESS__)
-#    define ZSTD_ADDRESS_SANITIZER 1
-#  else
-#    define ZSTD_ADDRESS_SANITIZER 0
-#  endif
-#endif
-
-/* detects whether we are being compiled under dfsan */
-#ifndef ZSTD_DATAFLOW_SANITIZER
-# if __has_feature(dataflow_sanitizer)
-#  define ZSTD_DATAFLOW_SANITIZER 1
-# else
-#  define ZSTD_DATAFLOW_SANITIZER 0
-# endif
-#endif
-
-/* Mark the internal assembly functions as hidden  */
-#ifdef __ELF__
-# define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func
-#else
-# define ZSTD_HIDE_ASM_FUNCTION(func)
-#endif
-
-/* Enable runtime BMI2 dispatch based on the CPU.
- * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
- */
-#ifndef DYNAMIC_BMI2
-  #if ((defined(__clang__) && __has_attribute(__target__)) \
-      || (defined(__GNUC__) \
-          && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
-      && (defined(__x86_64__) || defined(_M_X64)) \
-      && !defined(__BMI2__)
-  #  define DYNAMIC_BMI2 1
-  #else
-  #  define DYNAMIC_BMI2 0
-  #endif
-#endif
-
-/**
- * Only enable assembly for GNUC comptabile compilers,
- * because other platforms may not support GAS assembly syntax.
- *
- * Only enable assembly for Linux / MacOS, other platforms may
- * work, but they haven't been tested. This could likely be
- * extended to BSD systems.
- *
- * Disable assembly when MSAN is enabled, because MSAN requires
- * 100% of code to be instrumented to work.
- */
-#if defined(__GNUC__)
-#  if defined(__linux__) || defined(__linux) || defined(__APPLE__)
-#    if ZSTD_MEMORY_SANITIZER
-#      define ZSTD_ASM_SUPPORTED 0
-#    elif ZSTD_DATAFLOW_SANITIZER
-#      define ZSTD_ASM_SUPPORTED 0
-#    else
-#      define ZSTD_ASM_SUPPORTED 1
-#    endif
-#  else
-#    define ZSTD_ASM_SUPPORTED 0
-#  endif
-#else
-#  define ZSTD_ASM_SUPPORTED 0
-#endif
-
-/**
- * Determines whether we should enable assembly for x86-64
- * with BMI2.
- *
- * Enable if all of the following conditions hold:
- * - ASM hasn't been explicitly disabled by defining ZSTD_DISABLE_ASM
- * - Assembly is supported
- * - We are compiling for x86-64 and either:
- *   - DYNAMIC_BMI2 is enabled
- *   - BMI2 is supported at compile time
- */
-#if !defined(ZSTD_DISABLE_ASM) &&                                 \
-    ZSTD_ASM_SUPPORTED &&                                         \
-    defined(__x86_64__) &&                                        \
-    (DYNAMIC_BMI2 || defined(__BMI2__))
-# define ZSTD_ENABLE_ASM_X86_64_BMI2 1
-#else
-# define ZSTD_ENABLE_ASM_X86_64_BMI2 0
-#endif
-
-#endif /* ZSTD_PORTABILITY_MACROS_H */
diff --git a/dep/zstd/lib/common/threading.c b/dep/zstd/lib/common/threading.c
deleted file mode 100644
index 92cf57c19..000000000
--- a/dep/zstd/lib/common/threading.c
+++ /dev/null
@@ -1,122 +0,0 @@
-/**
- * Copyright (c) 2016 Tino Reichardt
- * All rights reserved.
- *
- * You can contact the author at:
- * - zstdmt source repository: https://github.com/mcmilk/zstdmt
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-/**
- * This file will hold wrapper for systems, which do not support pthreads
- */
-
-#include "threading.h"
-
-/* create fake symbol to avoid empty translation unit warning */
-int g_ZSTD_threading_useless_symbol;
-
-#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
-
-/**
- * Windows minimalist Pthread Wrapper, based on :
- * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
- */
-
-
-/* ===  Dependencies  === */
-#include <process.h>
-#include <errno.h>
-
-
-/* ===  Implementation  === */
-
-static unsigned __stdcall worker(void *arg)
-{
-    ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg;
-    thread->arg = thread->start_routine(thread->arg);
-    return 0;
-}
-
-int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused,
-            void* (*start_routine) (void*), void* arg)
-{
-    (void)unused;
-    thread->arg = arg;
-    thread->start_routine = start_routine;
-    thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL);
-
-    if (!thread->handle)
-        return errno;
-    else
-        return 0;
-}
-
-int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
-{
-    DWORD result;
-
-    if (!thread.handle) return 0;
-
-    result = WaitForSingleObject(thread.handle, INFINITE);
-    switch (result) {
-    case WAIT_OBJECT_0:
-        if (value_ptr) *value_ptr = thread.arg;
-        return 0;
-    case WAIT_ABANDONED:
-        return EINVAL;
-    default:
-        return GetLastError();
-    }
-}
-
-#endif   /* ZSTD_MULTITHREAD */
-
-#if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32)
-
-#define ZSTD_DEPS_NEED_MALLOC
-#include "zstd_deps.h"
-
-int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr)
-{
-    *mutex = (pthread_mutex_t*)ZSTD_malloc(sizeof(pthread_mutex_t));
-    if (!*mutex)
-        return 1;
-    return pthread_mutex_init(*mutex, attr);
-}
-
-int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
-{
-    if (!*mutex)
-        return 0;
-    {
-        int const ret = pthread_mutex_destroy(*mutex);
-        ZSTD_free(*mutex);
-        return ret;
-    }
-}
-
-int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr)
-{
-    *cond = (pthread_cond_t*)ZSTD_malloc(sizeof(pthread_cond_t));
-    if (!*cond)
-        return 1;
-    return pthread_cond_init(*cond, attr);
-}
-
-int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond)
-{
-    if (!*cond)
-        return 0;
-    {
-        int const ret = pthread_cond_destroy(*cond);
-        ZSTD_free(*cond);
-        return ret;
-    }
-}
-
-#endif
diff --git a/dep/zstd/lib/common/threading.h b/dep/zstd/lib/common/threading.h
deleted file mode 100644
index fd0060d5a..000000000
--- a/dep/zstd/lib/common/threading.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/**
- * Copyright (c) 2016 Tino Reichardt
- * All rights reserved.
- *
- * You can contact the author at:
- * - zstdmt source repository: https://github.com/mcmilk/zstdmt
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef THREADING_H_938743
-#define THREADING_H_938743
-
-#include "debug.h"
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
-
-/**
- * Windows minimalist Pthread Wrapper, based on :
- * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
- */
-#ifdef WINVER
-#  undef WINVER
-#endif
-#define WINVER       0x0600
-
-#ifdef _WIN32_WINNT
-#  undef _WIN32_WINNT
-#endif
-#define _WIN32_WINNT 0x0600
-
-#ifndef WIN32_LEAN_AND_MEAN
-#  define WIN32_LEAN_AND_MEAN
-#endif
-
-#undef ERROR   /* reported already defined on VS 2015 (Rich Geldreich) */
-#include <windows.h>
-#undef ERROR
-#define ERROR(name) ZSTD_ERROR(name)
-
-
-/* mutex */
-#define ZSTD_pthread_mutex_t           CRITICAL_SECTION
-#define ZSTD_pthread_mutex_init(a, b)  ((void)(b), InitializeCriticalSection((a)), 0)
-#define ZSTD_pthread_mutex_destroy(a)  DeleteCriticalSection((a))
-#define ZSTD_pthread_mutex_lock(a)     EnterCriticalSection((a))
-#define ZSTD_pthread_mutex_unlock(a)   LeaveCriticalSection((a))
-
-/* condition variable */
-#define ZSTD_pthread_cond_t             CONDITION_VARIABLE
-#define ZSTD_pthread_cond_init(a, b)    ((void)(b), InitializeConditionVariable((a)), 0)
-#define ZSTD_pthread_cond_destroy(a)    ((void)(a))
-#define ZSTD_pthread_cond_wait(a, b)    SleepConditionVariableCS((a), (b), INFINITE)
-#define ZSTD_pthread_cond_signal(a)     WakeConditionVariable((a))
-#define ZSTD_pthread_cond_broadcast(a)  WakeAllConditionVariable((a))
-
-/* ZSTD_pthread_create() and ZSTD_pthread_join() */
-typedef struct {
-    HANDLE handle;
-    void* (*start_routine)(void*);
-    void* arg;
-} ZSTD_pthread_t;
-
-int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused,
-                   void* (*start_routine) (void*), void* arg);
-
-int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
-
-/**
- * add here more wrappers as required
- */
-
-
-#elif defined(ZSTD_MULTITHREAD)    /* posix assumed ; need a better detection method */
-/* ===   POSIX Systems   === */
-#  include <pthread.h>
-
-#if DEBUGLEVEL < 1
-
-#define ZSTD_pthread_mutex_t            pthread_mutex_t
-#define ZSTD_pthread_mutex_init(a, b)   pthread_mutex_init((a), (b))
-#define ZSTD_pthread_mutex_destroy(a)   pthread_mutex_destroy((a))
-#define ZSTD_pthread_mutex_lock(a)      pthread_mutex_lock((a))
-#define ZSTD_pthread_mutex_unlock(a)    pthread_mutex_unlock((a))
-
-#define ZSTD_pthread_cond_t             pthread_cond_t
-#define ZSTD_pthread_cond_init(a, b)    pthread_cond_init((a), (b))
-#define ZSTD_pthread_cond_destroy(a)    pthread_cond_destroy((a))
-#define ZSTD_pthread_cond_wait(a, b)    pthread_cond_wait((a), (b))
-#define ZSTD_pthread_cond_signal(a)     pthread_cond_signal((a))
-#define ZSTD_pthread_cond_broadcast(a)  pthread_cond_broadcast((a))
-
-#define ZSTD_pthread_t                  pthread_t
-#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
-#define ZSTD_pthread_join(a, b)         pthread_join((a),(b))
-
-#else /* DEBUGLEVEL >= 1 */
-
-/* Debug implementation of threading.
- * In this implementation we use pointers for mutexes and condition variables.
- * This way, if we forget to init/destroy them the program will crash or ASAN
- * will report leaks.
- */
-
-#define ZSTD_pthread_mutex_t            pthread_mutex_t*
-int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr);
-int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex);
-#define ZSTD_pthread_mutex_lock(a)      pthread_mutex_lock(*(a))
-#define ZSTD_pthread_mutex_unlock(a)    pthread_mutex_unlock(*(a))
-
-#define ZSTD_pthread_cond_t             pthread_cond_t*
-int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr);
-int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond);
-#define ZSTD_pthread_cond_wait(a, b)    pthread_cond_wait(*(a), *(b))
-#define ZSTD_pthread_cond_signal(a)     pthread_cond_signal(*(a))
-#define ZSTD_pthread_cond_broadcast(a)  pthread_cond_broadcast(*(a))
-
-#define ZSTD_pthread_t                  pthread_t
-#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
-#define ZSTD_pthread_join(a, b)         pthread_join((a),(b))
-
-#endif
-
-#else  /* ZSTD_MULTITHREAD not defined */
-/* No multithreading support */
-
-typedef int ZSTD_pthread_mutex_t;
-#define ZSTD_pthread_mutex_init(a, b)   ((void)(a), (void)(b), 0)
-#define ZSTD_pthread_mutex_destroy(a)   ((void)(a))
-#define ZSTD_pthread_mutex_lock(a)      ((void)(a))
-#define ZSTD_pthread_mutex_unlock(a)    ((void)(a))
-
-typedef int ZSTD_pthread_cond_t;
-#define ZSTD_pthread_cond_init(a, b)    ((void)(a), (void)(b), 0)
-#define ZSTD_pthread_cond_destroy(a)    ((void)(a))
-#define ZSTD_pthread_cond_wait(a, b)    ((void)(a), (void)(b))
-#define ZSTD_pthread_cond_signal(a)     ((void)(a))
-#define ZSTD_pthread_cond_broadcast(a)  ((void)(a))
-
-/* do not use ZSTD_pthread_t */
-
-#endif /* ZSTD_MULTITHREAD */
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif /* THREADING_H_938743 */
diff --git a/dep/zstd/lib/common/xxhash.c b/dep/zstd/lib/common/xxhash.c
deleted file mode 100644
index d49497cf1..000000000
--- a/dep/zstd/lib/common/xxhash.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- *  xxHash - Fast Hash algorithm
- *  Copyright (c) Yann Collet, Facebook, Inc.
- *
- *  You can contact the author at :
- *  - xxHash homepage: http://www.xxhash.com
- *  - xxHash source repository : https://github.com/Cyan4973/xxHash
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
-*/
-
-
-
-/*
- * xxhash.c instantiates functions defined in xxhash.h
- */
-
-#define XXH_STATIC_LINKING_ONLY   /* access advanced declarations */
-#define XXH_IMPLEMENTATION   /* access definitions */
-
-#include "xxhash.h"
diff --git a/dep/zstd/lib/common/xxhash.h b/dep/zstd/lib/common/xxhash.h
deleted file mode 100644
index 8ebbfdd62..000000000
--- a/dep/zstd/lib/common/xxhash.h
+++ /dev/null
@@ -1,5686 +0,0 @@
-/*
- *  xxHash - Fast Hash algorithm
- *  Copyright (c) Yann Collet, Facebook, Inc.
- *
- *  You can contact the author at :
- *  - xxHash homepage: http://www.xxhash.com
- *  - xxHash source repository : https://github.com/Cyan4973/xxHash
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
-*/
-
-
-#ifndef XXH_NO_XXH3
-# define XXH_NO_XXH3
-#endif
-
-#ifndef XXH_NAMESPACE
-# define XXH_NAMESPACE ZSTD_
-#endif
-
-/*!
- * @mainpage xxHash
- *
- * @file xxhash.h
- * xxHash prototypes and implementation
- */
-/* TODO: update */
-/* Notice extracted from xxHash homepage:
-
-xxHash is an extremely fast hash algorithm, running at RAM speed limits.
-It also successfully passes all tests from the SMHasher suite.
-
-Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
-
-Name            Speed       Q.Score   Author
-xxHash          5.4 GB/s     10
-CrapWow         3.2 GB/s      2       Andrew
-MurmurHash 3a   2.7 GB/s     10       Austin Appleby
-SpookyHash      2.0 GB/s     10       Bob Jenkins
-SBox            1.4 GB/s      9       Bret Mulvey
-Lookup3         1.2 GB/s      9       Bob Jenkins
-SuperFastHash   1.2 GB/s      1       Paul Hsieh
-CityHash64      1.05 GB/s    10       Pike & Alakuijala
-FNV             0.55 GB/s     5       Fowler, Noll, Vo
-CRC32           0.43 GB/s     9
-MD5-32          0.33 GB/s    10       Ronald L. Rivest
-SHA1-32         0.28 GB/s    10
-
-Q.Score is a measure of quality of the hash function.
-It depends on successfully passing SMHasher test set.
-10 is a perfect score.
-
-Note: SMHasher's CRC32 implementation is not the fastest one.
-Other speed-oriented implementations can be faster,
-especially in combination with PCLMUL instruction:
-https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735
-
-A 64-bit version, named XXH64, is available since r35.
-It offers much better speed, but for 64-bit applications only.
-Name     Speed on 64 bits    Speed on 32 bits
-XXH64       13.8 GB/s            1.9 GB/s
-XXH32        6.8 GB/s            6.0 GB/s
-*/
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-/* ****************************
- *  INLINE mode
- ******************************/
-/*!
- * XXH_INLINE_ALL (and XXH_PRIVATE_API)
- * Use these build macros to inline xxhash into the target unit.
- * Inlining improves performance on small inputs, especially when the length is
- * expressed as a compile-time constant:
- *
- *      https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
- *
- * It also keeps xxHash symbols private to the unit, so they are not exported.
- *
- * Usage:
- *     #define XXH_INLINE_ALL
- *     #include "xxhash.h"
- *
- * Do not compile and link xxhash.o as a separate object, as it is not useful.
- */
-#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
-    && !defined(XXH_INLINE_ALL_31684351384)
-   /* this section should be traversed only once */
-#  define XXH_INLINE_ALL_31684351384
-   /* give access to the advanced API, required to compile implementations */
-#  undef XXH_STATIC_LINKING_ONLY   /* avoid macro redef */
-#  define XXH_STATIC_LINKING_ONLY
-   /* make all functions private */
-#  undef XXH_PUBLIC_API
-#  if defined(__GNUC__)
-#    define XXH_PUBLIC_API static __inline __attribute__((unused))
-#  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-#    define XXH_PUBLIC_API static inline
-#  elif defined(_MSC_VER)
-#    define XXH_PUBLIC_API static __inline
-#  else
-     /* note: this version may generate warnings for unused static functions */
-#    define XXH_PUBLIC_API static
-#  endif
-
-   /*
-    * This part deals with the special case where a unit wants to inline xxHash,
-    * but "xxhash.h" has previously been included without XXH_INLINE_ALL,
-    * such as part of some previously included *.h header file.
-    * Without further action, the new include would just be ignored,
-    * and functions would effectively _not_ be inlined (silent failure).
-    * The following macros solve this situation by prefixing all inlined names,
-    * avoiding naming collision with previous inclusions.
-    */
-   /* Before that, we unconditionally #undef all symbols,
-    * in case they were already defined with XXH_NAMESPACE.
-    * They will then be redefined for XXH_INLINE_ALL
-    */
-#  undef XXH_versionNumber
-    /* XXH32 */
-#  undef XXH32
-#  undef XXH32_createState
-#  undef XXH32_freeState
-#  undef XXH32_reset
-#  undef XXH32_update
-#  undef XXH32_digest
-#  undef XXH32_copyState
-#  undef XXH32_canonicalFromHash
-#  undef XXH32_hashFromCanonical
-    /* XXH64 */
-#  undef XXH64
-#  undef XXH64_createState
-#  undef XXH64_freeState
-#  undef XXH64_reset
-#  undef XXH64_update
-#  undef XXH64_digest
-#  undef XXH64_copyState
-#  undef XXH64_canonicalFromHash
-#  undef XXH64_hashFromCanonical
-    /* XXH3_64bits */
-#  undef XXH3_64bits
-#  undef XXH3_64bits_withSecret
-#  undef XXH3_64bits_withSeed
-#  undef XXH3_64bits_withSecretandSeed
-#  undef XXH3_createState
-#  undef XXH3_freeState
-#  undef XXH3_copyState
-#  undef XXH3_64bits_reset
-#  undef XXH3_64bits_reset_withSeed
-#  undef XXH3_64bits_reset_withSecret
-#  undef XXH3_64bits_update
-#  undef XXH3_64bits_digest
-#  undef XXH3_generateSecret
-    /* XXH3_128bits */
-#  undef XXH128
-#  undef XXH3_128bits
-#  undef XXH3_128bits_withSeed
-#  undef XXH3_128bits_withSecret
-#  undef XXH3_128bits_reset
-#  undef XXH3_128bits_reset_withSeed
-#  undef XXH3_128bits_reset_withSecret
-#  undef XXH3_128bits_reset_withSecretandSeed
-#  undef XXH3_128bits_update
-#  undef XXH3_128bits_digest
-#  undef XXH128_isEqual
-#  undef XXH128_cmp
-#  undef XXH128_canonicalFromHash
-#  undef XXH128_hashFromCanonical
-    /* Finally, free the namespace itself */
-#  undef XXH_NAMESPACE
-
-    /* employ the namespace for XXH_INLINE_ALL */
-#  define XXH_NAMESPACE XXH_INLINE_
-   /*
-    * Some identifiers (enums, type names) are not symbols,
-    * but they must nonetheless be renamed to avoid redeclaration.
-    * Alternative solution: do not redeclare them.
-    * However, this requires some #ifdefs, and has a more dispersed impact.
-    * Meanwhile, renaming can be achieved in a single place.
-    */
-#  define XXH_IPREF(Id)   XXH_NAMESPACE ## Id
-#  define XXH_OK XXH_IPREF(XXH_OK)
-#  define XXH_ERROR XXH_IPREF(XXH_ERROR)
-#  define XXH_errorcode XXH_IPREF(XXH_errorcode)
-#  define XXH32_canonical_t  XXH_IPREF(XXH32_canonical_t)
-#  define XXH64_canonical_t  XXH_IPREF(XXH64_canonical_t)
-#  define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t)
-#  define XXH32_state_s XXH_IPREF(XXH32_state_s)
-#  define XXH32_state_t XXH_IPREF(XXH32_state_t)
-#  define XXH64_state_s XXH_IPREF(XXH64_state_s)
-#  define XXH64_state_t XXH_IPREF(XXH64_state_t)
-#  define XXH3_state_s  XXH_IPREF(XXH3_state_s)
-#  define XXH3_state_t  XXH_IPREF(XXH3_state_t)
-#  define XXH128_hash_t XXH_IPREF(XXH128_hash_t)
-   /* Ensure the header is parsed again, even if it was previously included */
-#  undef XXHASH_H_5627135585666179
-#  undef XXHASH_H_STATIC_13879238742
-#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
-
-
-
-/* ****************************************************************
- *  Stable API
- *****************************************************************/
-#ifndef XXHASH_H_5627135585666179
-#define XXHASH_H_5627135585666179 1
-
-
-/*!
- * @defgroup public Public API
- * Contains details on the public xxHash functions.
- * @{
- */
-/* specific declaration modes for Windows */
-#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
-#  if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
-#    ifdef XXH_EXPORT
-#      define XXH_PUBLIC_API __declspec(dllexport)
-#    elif XXH_IMPORT
-#      define XXH_PUBLIC_API __declspec(dllimport)
-#    endif
-#  else
-#    define XXH_PUBLIC_API   /* do nothing */
-#  endif
-#endif
-
-#ifdef XXH_DOXYGEN
-/*!
- * @brief Emulate a namespace by transparently prefixing all symbols.
- *
- * If you want to include _and expose_ xxHash functions from within your own
- * library, but also want to avoid symbol collisions with other libraries which
- * may also include xxHash, you can use XXH_NAMESPACE to automatically prefix
- * any public symbol from xxhash library with the value of XXH_NAMESPACE
- * (therefore, avoid empty or numeric values).
- *
- * Note that no change is required within the calling program as long as it
- * includes `xxhash.h`: Regular symbol names will be automatically translated
- * by this header.
- */
-#  define XXH_NAMESPACE /* YOUR NAME HERE */
-#  undef XXH_NAMESPACE
-#endif
-
-#ifdef XXH_NAMESPACE
-#  define XXH_CAT(A,B) A##B
-#  define XXH_NAME2(A,B) XXH_CAT(A,B)
-#  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
-/* XXH32 */
-#  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
-#  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
-#  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
-#  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
-#  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
-#  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
-#  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
-#  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
-#  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
-/* XXH64 */
-#  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
-#  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
-#  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
-#  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
-#  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
-#  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
-#  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
-#  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
-#  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
-/* XXH3_64bits */
-#  define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
-#  define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
-#  define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
-#  define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed)
-#  define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
-#  define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
-#  define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
-#  define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
-#  define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
-#  define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
-#  define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed)
-#  define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
-#  define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
-#  define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)
-#  define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed)
-/* XXH3_128bits */
-#  define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
-#  define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
-#  define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
-#  define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
-#  define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed)
-#  define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
-#  define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
-#  define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
-#  define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed)
-#  define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
-#  define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
-#  define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
-#  define XXH128_cmp     XXH_NAME2(XXH_NAMESPACE, XXH128_cmp)
-#  define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash)
-#  define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical)
-#endif
-
-
-/* *************************************
-*  Version
-***************************************/
-#define XXH_VERSION_MAJOR    0
-#define XXH_VERSION_MINOR    8
-#define XXH_VERSION_RELEASE  1
-#define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
-
-/*!
- * @brief Obtains the xxHash version.
- *
- * This is mostly useful when xxHash is compiled as a shared library,
- * since the returned value comes from the library, as opposed to header file.
- *
- * @return `XXH_VERSION_NUMBER` of the invoked library.
- */
-XXH_PUBLIC_API unsigned XXH_versionNumber (void);
-
-
-/* ****************************
-*  Common basic types
-******************************/
-#include <stddef.h>   /* size_t */
-typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
-
-
-/*-**********************************************************************
-*  32-bit hash
-************************************************************************/
-#if defined(XXH_DOXYGEN) /* Don't show <stdint.h> include */
-/*!
- * @brief An unsigned 32-bit integer.
- *
- * Not necessarily defined to `uint32_t` but functionally equivalent.
- */
-typedef uint32_t XXH32_hash_t;
-
-#elif !defined (__VMS) \
-  && (defined (__cplusplus) \
-  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-#   include <stdint.h>
-    typedef uint32_t XXH32_hash_t;
-
-#else
-#   include <limits.h>
-#   if UINT_MAX == 0xFFFFFFFFUL
-      typedef unsigned int XXH32_hash_t;
-#   else
-#     if ULONG_MAX == 0xFFFFFFFFUL
-        typedef unsigned long XXH32_hash_t;
-#     else
-#       error "unsupported platform: need a 32-bit type"
-#     endif
-#   endif
-#endif
-
-/*!
- * @}
- *
- * @defgroup xxh32_family XXH32 family
- * @ingroup public
- * Contains functions used in the classic 32-bit xxHash algorithm.
- *
- * @note
- *   XXH32 is useful for older platforms, with no or poor 64-bit performance.
- *   Note that @ref xxh3_family provides competitive speed
- *   for both 32-bit and 64-bit systems, and offers true 64/128 bit hash results.
- *
- * @see @ref xxh64_family, @ref xxh3_family : Other xxHash families
- * @see @ref xxh32_impl for implementation details
- * @{
- */
-
-/*!
- * @brief Calculates the 32-bit hash of @p input using xxHash32.
- *
- * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s
- *
- * @param input The block of data to be hashed, at least @p length bytes in size.
- * @param length The length of @p input, in bytes.
- * @param seed The 32-bit seed to alter the hash's output predictably.
- *
- * @pre
- *   The memory between @p input and @p input + @p length must be valid,
- *   readable, contiguous memory. However, if @p length is `0`, @p input may be
- *   `NULL`. In C++, this also must be *TriviallyCopyable*.
- *
- * @return The calculated 32-bit hash value.
- *
- * @see
- *    XXH64(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
- *    Direct equivalents for the other variants of xxHash.
- * @see
- *    XXH32_createState(), XXH32_update(), XXH32_digest(): Streaming version.
- */
-XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
-
-/*!
- * Streaming functions generate the xxHash value from an incremental input.
- * This method is slower than single-call functions, due to state management.
- * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
- *
- * An XXH state must first be allocated using `XXH*_createState()`.
- *
- * Start a new hash by initializing the state with a seed using `XXH*_reset()`.
- *
- * Then, feed the hash state by calling `XXH*_update()` as many times as necessary.
- *
- * The function returns an error code, with 0 meaning OK, and any other value
- * meaning there is an error.
- *
- * Finally, a hash value can be produced anytime, by using `XXH*_digest()`.
- * This function returns the nn-bits hash as an int or long long.
- *
- * It's still possible to continue inserting input into the hash state after a
- * digest, and generate new hash values later on by invoking `XXH*_digest()`.
- *
- * When done, release the state using `XXH*_freeState()`.
- *
- * Example code for incrementally hashing a file:
- * @code{.c}
- *    #include <stdio.h>
- *    #include <xxhash.h>
- *    #define BUFFER_SIZE 256
- *
- *    // Note: XXH64 and XXH3 use the same interface.
- *    XXH32_hash_t
- *    hashFile(FILE* stream)
- *    {
- *        XXH32_state_t* state;
- *        unsigned char buf[BUFFER_SIZE];
- *        size_t amt;
- *        XXH32_hash_t hash;
- *
- *        state = XXH32_createState();       // Create a state
- *        assert(state != NULL);             // Error check here
- *        XXH32_reset(state, 0xbaad5eed);    // Reset state with our seed
- *        while ((amt = fread(buf, 1, sizeof(buf), stream)) != 0) {
- *            XXH32_update(state, buf, amt); // Hash the file in chunks
- *        }
- *        hash = XXH32_digest(state);        // Finalize the hash
- *        XXH32_freeState(state);            // Clean up
- *        return hash;
- *    }
- * @endcode
- */
-
-/*!
- * @typedef struct XXH32_state_s XXH32_state_t
- * @brief The opaque state struct for the XXH32 streaming API.
- *
- * @see XXH32_state_s for details.
- */
-typedef struct XXH32_state_s XXH32_state_t;
-
-/*!
- * @brief Allocates an @ref XXH32_state_t.
- *
- * Must be freed with XXH32_freeState().
- * @return An allocated XXH32_state_t on success, `NULL` on failure.
- */
-XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
-/*!
- * @brief Frees an @ref XXH32_state_t.
- *
- * Must be allocated with XXH32_createState().
- * @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref XXH32_createState().
- * @return XXH_OK.
- */
-XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
-/*!
- * @brief Copies one @ref XXH32_state_t to another.
- *
- * @param dst_state The state to copy to.
- * @param src_state The state to copy from.
- * @pre
- *   @p dst_state and @p src_state must not be `NULL` and must not overlap.
- */
-XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
-
-/*!
- * @brief Resets an @ref XXH32_state_t to begin a new hash.
- *
- * This function resets and seeds a state. Call it before @ref XXH32_update().
- *
- * @param statePtr The state struct to reset.
- * @param seed The 32-bit seed to alter the hash result predictably.
- *
- * @pre
- *   @p statePtr must not be `NULL`.
- *
- * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
- */
-XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, XXH32_hash_t seed);
-
-/*!
- * @brief Consumes a block of @p input to an @ref XXH32_state_t.
- *
- * Call this to incrementally consume blocks of data.
- *
- * @param statePtr The state struct to update.
- * @param input The block of data to be hashed, at least @p length bytes in size.
- * @param length The length of @p input, in bytes.
- *
- * @pre
- *   @p statePtr must not be `NULL`.
- * @pre
- *   The memory between @p input and @p input + @p length must be valid,
- *   readable, contiguous memory. However, if @p length is `0`, @p input may be
- *   `NULL`. In C++, this also must be *TriviallyCopyable*.
- *
- * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
- */
-XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
-
-/*!
- * @brief Returns the calculated hash value from an @ref XXH32_state_t.
- *
- * @note
- *   Calling XXH32_digest() will not affect @p statePtr, so you can update,
- *   digest, and update again.
- *
- * @param statePtr The state struct to calculate the hash from.
- *
- * @pre
- *  @p statePtr must not be `NULL`.
- *
- * @return The calculated xxHash32 value from that state.
- */
-XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
-
-/*******   Canonical representation   *******/
-
-/*
- * The default return values from XXH functions are unsigned 32 and 64 bit
- * integers.
- * This the simplest and fastest format for further post-processing.
- *
- * However, this leaves open the question of what is the order on the byte level,
- * since little and big endian conventions will store the same number differently.
- *
- * The canonical representation settles this issue by mandating big-endian
- * convention, the same convention as human-readable numbers (large digits first).
- *
- * When writing hash values to storage, sending them over a network, or printing
- * them, it's highly recommended to use the canonical representation to ensure
- * portability across a wider range of systems, present and future.
- *
- * The following functions allow transformation of hash values to and from
- * canonical format.
- */
-
-/*!
- * @brief Canonical (big endian) representation of @ref XXH32_hash_t.
- */
-typedef struct {
-    unsigned char digest[4]; /*!< Hash bytes, big endian */
-} XXH32_canonical_t;
-
-/*!
- * @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t.
- *
- * @param dst The @ref XXH32_canonical_t pointer to be stored to.
- * @param hash The @ref XXH32_hash_t to be converted.
- *
- * @pre
- *   @p dst must not be `NULL`.
- */
-XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
-
-/*!
- * @brief Converts an @ref XXH32_canonical_t to a native @ref XXH32_hash_t.
- *
- * @param src The @ref XXH32_canonical_t to convert.
- *
- * @pre
- *   @p src must not be `NULL`.
- *
- * @return The converted hash.
- */
-XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
-
-
-#ifdef __has_attribute
-# define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
-#else
-# define XXH_HAS_ATTRIBUTE(x) 0
-#endif
-
-/* C-language Attributes are added in C23. */
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
-# define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
-#else
-# define XXH_HAS_C_ATTRIBUTE(x) 0
-#endif
-
-#if defined(__cplusplus) && defined(__has_cpp_attribute)
-# define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
-#else
-# define XXH_HAS_CPP_ATTRIBUTE(x) 0
-#endif
-
-/*
-Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
-introduced in CPP17 and C23.
-CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
-C23   : https://en.cppreference.com/w/c/language/attributes/fallthrough
-*/
-#if XXH_HAS_C_ATTRIBUTE(x)
-# define XXH_FALLTHROUGH [[fallthrough]]
-#elif XXH_HAS_CPP_ATTRIBUTE(x)
-# define XXH_FALLTHROUGH [[fallthrough]]
-#elif XXH_HAS_ATTRIBUTE(__fallthrough__)
-# define XXH_FALLTHROUGH __attribute__ ((fallthrough))
-#else
-# define XXH_FALLTHROUGH
-#endif
-
-/*!
- * @}
- * @ingroup public
- * @{
- */
-
-#ifndef XXH_NO_LONG_LONG
-/*-**********************************************************************
-*  64-bit hash
-************************************************************************/
-#if defined(XXH_DOXYGEN) /* don't include <stdint.h> */
-/*!
- * @brief An unsigned 64-bit integer.
- *
- * Not necessarily defined to `uint64_t` but functionally equivalent.
- */
-typedef uint64_t XXH64_hash_t;
-#elif !defined (__VMS) \
-  && (defined (__cplusplus) \
-  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-#  include <stdint.h>
-   typedef uint64_t XXH64_hash_t;
-#else
-#  include <limits.h>
-#  if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL
-     /* LP64 ABI says uint64_t is unsigned long */
-     typedef unsigned long XXH64_hash_t;
-#  else
-     /* the following type must have a width of 64-bit */
-     typedef unsigned long long XXH64_hash_t;
-#  endif
-#endif
-
-/*!
- * @}
- *
- * @defgroup xxh64_family XXH64 family
- * @ingroup public
- * @{
- * Contains functions used in the classic 64-bit xxHash algorithm.
- *
- * @note
- *   XXH3 provides competitive speed for both 32-bit and 64-bit systems,
- *   and offers true 64/128 bit hash results.
- *   It provides better speed for systems with vector processing capabilities.
- */
-
-
-/*!
- * @brief Calculates the 64-bit hash of @p input using xxHash64.
- *
- * This function usually runs faster on 64-bit systems, but slower on 32-bit
- * systems (see benchmark).
- *
- * @param input The block of data to be hashed, at least @p length bytes in size.
- * @param length The length of @p input, in bytes.
- * @param seed The 64-bit seed to alter the hash's output predictably.
- *
- * @pre
- *   The memory between @p input and @p input + @p length must be valid,
- *   readable, contiguous memory. However, if @p length is `0`, @p input may be
- *   `NULL`. In C++, this also must be *TriviallyCopyable*.
- *
- * @return The calculated 64-bit hash.
- *
- * @see
- *    XXH32(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
- *    Direct equivalents for the other variants of xxHash.
- * @see
- *    XXH64_createState(), XXH64_update(), XXH64_digest(): Streaming version.
- */
-XXH_PUBLIC_API XXH64_hash_t XXH64(const void* input, size_t length, XXH64_hash_t seed);
-
-/*******   Streaming   *******/
-/*!
- * @brief The opaque state struct for the XXH64 streaming API.
- *
- * @see XXH64_state_s for details.
- */
-typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
-XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
-XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
-XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
-
-XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, XXH64_hash_t seed);
-XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
-XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
-
-/*******   Canonical representation   *******/
-typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
-XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
-XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
-
-#ifndef XXH_NO_XXH3
-/*!
- * @}
- * ************************************************************************
- * @defgroup xxh3_family XXH3 family
- * @ingroup public
- * @{
- *
- * XXH3 is a more recent hash algorithm featuring:
- *  - Improved speed for both small and large inputs
- *  - True 64-bit and 128-bit outputs
- *  - SIMD acceleration
- *  - Improved 32-bit viability
- *
- * Speed analysis methodology is explained here:
- *
- *    https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html
- *
- * Compared to XXH64, expect XXH3 to run approximately
- * ~2x faster on large inputs and >3x faster on small ones,
- * exact differences vary depending on platform.
- *
- * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic,
- * but does not require it.
- * Any 32-bit and 64-bit targets that can run XXH32 smoothly
- * can run XXH3 at competitive speeds, even without vector support.
- * Further details are explained in the implementation.
- *
- * Optimized implementations are provided for AVX512, AVX2, SSE2, NEON, POWER8,
- * ZVector and scalar targets. This can be controlled via the XXH_VECTOR macro.
- *
- * XXH3 implementation is portable:
- * it has a generic C90 formulation that can be compiled on any platform,
- * all implementations generage exactly the same hash value on all platforms.
- * Starting from v0.8.0, it's also labelled "stable", meaning that
- * any future version will also generate the same hash value.
- *
- * XXH3 offers 2 variants, _64bits and _128bits.
- *
- * When only 64 bits are needed, prefer invoking the _64bits variant, as it
- * reduces the amount of mixing, resulting in faster speed on small inputs.
- * It's also generally simpler to manipulate a scalar return type than a struct.
- *
- * The API supports one-shot hashing, streaming mode, and custom secrets.
- */
-
-/*-**********************************************************************
-*  XXH3 64-bit variant
-************************************************************************/
-
-/* XXH3_64bits():
- * default 64-bit variant, using default secret and default seed of 0.
- * It's the fastest variant. */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* data, size_t len);
-
-/*
- * XXH3_64bits_withSeed():
- * This variant generates a custom secret on the fly
- * based on default secret altered using the `seed` value.
- * While this operation is decently fast, note that it's not completely free.
- * Note: seed==0 produces the same results as XXH3_64bits().
- */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
-
-/*!
- * The bare minimum size for a custom secret.
- *
- * @see
- *  XXH3_64bits_withSecret(), XXH3_64bits_reset_withSecret(),
- *  XXH3_128bits_withSecret(), XXH3_128bits_reset_withSecret().
- */
-#define XXH3_SECRET_SIZE_MIN 136
-
-/*
- * XXH3_64bits_withSecret():
- * It's possible to provide any blob of bytes as a "secret" to generate the hash.
- * This makes it more difficult for an external actor to prepare an intentional collision.
- * The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN).
- * However, the quality of the secret impacts the dispersion of the hash algorithm.
- * Therefore, the secret _must_ look like a bunch of random bytes.
- * Avoid "trivial" or structured data such as repeated sequences or a text document.
- * Whenever in doubt about the "randomness" of the blob of bytes,
- * consider employing "XXH3_generateSecret()" instead (see below).
- * It will generate a proper high entropy secret derived from the blob of bytes.
- * Another advantage of using XXH3_generateSecret() is that
- * it guarantees that all bits within the initial blob of bytes
- * will impact every bit of the output.
- * This is not necessarily the case when using the blob of bytes directly
- * because, when hashing _small_ inputs, only a portion of the secret is employed.
- */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
-
-
-/*******   Streaming   *******/
-/*
- * Streaming requires state maintenance.
- * This operation costs memory and CPU.
- * As a consequence, streaming is slower than one-shot hashing.
- * For better performance, prefer one-shot functions whenever applicable.
- */
-
-/*!
- * @brief The state struct for the XXH3 streaming API.
- *
- * @see XXH3_state_s for details.
- */
-typedef struct XXH3_state_s XXH3_state_t;
-XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void);
-XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr);
-XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state);
-
-/*
- * XXH3_64bits_reset():
- * Initialize with default parameters.
- * digest will be equivalent to `XXH3_64bits()`.
- */
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH3_state_t* statePtr);
-/*
- * XXH3_64bits_reset_withSeed():
- * Generate a custom secret from `seed`, and store it into `statePtr`.
- * digest will be equivalent to `XXH3_64bits_withSeed()`.
- */
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
-/*
- * XXH3_64bits_reset_withSecret():
- * `secret` is referenced, it _must outlive_ the hash streaming session.
- * Similar to one-shot API, `secretSize` must be >= `XXH3_SECRET_SIZE_MIN`,
- * and the quality of produced hash values depends on secret's entropy
- * (secret's content should look like a bunch of random bytes).
- * When in doubt about the randomness of a candidate `secret`,
- * consider employing `XXH3_generateSecret()` instead (see below).
- */
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
-
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
-XXH_PUBLIC_API XXH64_hash_t  XXH3_64bits_digest (const XXH3_state_t* statePtr);
-
-/* note : canonical representation of XXH3 is the same as XXH64
- * since they both produce XXH64_hash_t values */
-
-
-/*-**********************************************************************
-*  XXH3 128-bit variant
-************************************************************************/
-
-/*!
- * @brief The return value from 128-bit hashes.
- *
- * Stored in little endian order, although the fields themselves are in native
- * endianness.
- */
-typedef struct {
-    XXH64_hash_t low64;   /*!< `value & 0xFFFFFFFFFFFFFFFF` */
-    XXH64_hash_t high64;  /*!< `value >> 64` */
-} XXH128_hash_t;
-
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* data, size_t len);
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
-
-/*******   Streaming   *******/
-/*
- * Streaming requires state maintenance.
- * This operation costs memory and CPU.
- * As a consequence, streaming is slower than one-shot hashing.
- * For better performance, prefer one-shot functions whenever applicable.
- *
- * XXH3_128bits uses the same XXH3_state_t as XXH3_64bits().
- * Use already declared XXH3_createState() and XXH3_freeState().
- *
- * All reset and streaming functions have same meaning as their 64-bit counterpart.
- */
-
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH3_state_t* statePtr);
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
-
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);
-
-/* Following helper functions make it possible to compare XXH128_hast_t values.
- * Since XXH128_hash_t is a structure, this capability is not offered by the language.
- * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */
-
-/*!
- * XXH128_isEqual():
- * Return: 1 if `h1` and `h2` are equal, 0 if they are not.
- */
-XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
-
-/*!
- * XXH128_cmp():
- *
- * This comparator is compatible with stdlib's `qsort()`/`bsearch()`.
- *
- * return: >0 if *h128_1  > *h128_2
- *         =0 if *h128_1 == *h128_2
- *         <0 if *h128_1  < *h128_2
- */
-XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2);
-
-
-/*******   Canonical representation   *******/
-typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t;
-XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash);
-XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src);
-
-
-#endif  /* !XXH_NO_XXH3 */
-#endif  /* XXH_NO_LONG_LONG */
-
-/*!
- * @}
- */
-#endif /* XXHASH_H_5627135585666179 */
-
-
-
-#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
-#define XXHASH_H_STATIC_13879238742
-/* ****************************************************************************
- * This section contains declarations which are not guaranteed to remain stable.
- * They may change in future versions, becoming incompatible with a different
- * version of the library.
- * These declarations should only be used with static linking.
- * Never use them in association with dynamic linking!
- ***************************************************************************** */
-
-/*
- * These definitions are only present to allow static allocation
- * of XXH states, on stack or in a struct, for example.
- * Never **ever** access their members directly.
- */
-
-/*!
- * @internal
- * @brief Structure for XXH32 streaming API.
- *
- * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
- * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
- * an opaque type. This allows fields to safely be changed.
- *
- * Typedef'd to @ref XXH32_state_t.
- * Do not access the members of this struct directly.
- * @see XXH64_state_s, XXH3_state_s
- */
-struct XXH32_state_s {
-   XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */
-   XXH32_hash_t large_len;    /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */
-   XXH32_hash_t v[4];         /*!< Accumulator lanes */
-   XXH32_hash_t mem32[4];     /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */
-   XXH32_hash_t memsize;      /*!< Amount of data in @ref mem32 */
-   XXH32_hash_t reserved;     /*!< Reserved field. Do not read nor write to it. */
-};   /* typedef'd to XXH32_state_t */
-
-
-#ifndef XXH_NO_LONG_LONG  /* defined when there is no 64-bit support */
-
-/*!
- * @internal
- * @brief Structure for XXH64 streaming API.
- *
- * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
- * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
- * an opaque type. This allows fields to safely be changed.
- *
- * Typedef'd to @ref XXH64_state_t.
- * Do not access the members of this struct directly.
- * @see XXH32_state_s, XXH3_state_s
- */
-struct XXH64_state_s {
-   XXH64_hash_t total_len;    /*!< Total length hashed. This is always 64-bit. */
-   XXH64_hash_t v[4];         /*!< Accumulator lanes */
-   XXH64_hash_t mem64[4];     /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */
-   XXH32_hash_t memsize;      /*!< Amount of data in @ref mem64 */
-   XXH32_hash_t reserved32;   /*!< Reserved field, needed for padding anyways*/
-   XXH64_hash_t reserved64;   /*!< Reserved field. Do not read or write to it. */
-};   /* typedef'd to XXH64_state_t */
-
-
-#ifndef XXH_NO_XXH3
-
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */
-#  include <stdalign.h>
-#  define XXH_ALIGN(n)      alignas(n)
-#elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */
-/* In C++ alignas() is a keyword */
-#  define XXH_ALIGN(n)      alignas(n)
-#elif defined(__GNUC__)
-#  define XXH_ALIGN(n)      __attribute__ ((aligned(n)))
-#elif defined(_MSC_VER)
-#  define XXH_ALIGN(n)      __declspec(align(n))
-#else
-#  define XXH_ALIGN(n)   /* disabled */
-#endif
-
-/* Old GCC versions only accept the attribute after the type in structures. */
-#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L))   /* C11+ */ \
-    && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \
-    && defined(__GNUC__)
-#   define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align)
-#else
-#   define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type
-#endif
-
-/*!
- * @brief The size of the internal XXH3 buffer.
- *
- * This is the optimal update size for incremental hashing.
- *
- * @see XXH3_64b_update(), XXH3_128b_update().
- */
-#define XXH3_INTERNALBUFFER_SIZE 256
-
-/*!
- * @brief Default size of the secret buffer (and @ref XXH3_kSecret).
- *
- * This is the size used in @ref XXH3_kSecret and the seeded functions.
- *
- * Not to be confused with @ref XXH3_SECRET_SIZE_MIN.
- */
-#define XXH3_SECRET_DEFAULT_SIZE 192
-
-/*!
- * @internal
- * @brief Structure for XXH3 streaming API.
- *
- * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
- * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined.
- * Otherwise it is an opaque type.
- * Never use this definition in combination with dynamic library.
- * This allows fields to safely be changed in the future.
- *
- * @note ** This structure has a strict alignment requirement of 64 bytes!! **
- * Do not allocate this with `malloc()` or `new`,
- * it will not be sufficiently aligned.
- * Use @ref XXH3_createState() and @ref XXH3_freeState(), or stack allocation.
- *
- * Typedef'd to @ref XXH3_state_t.
- * Do never access the members of this struct directly.
- *
- * @see XXH3_INITSTATE() for stack initialization.
- * @see XXH3_createState(), XXH3_freeState().
- * @see XXH32_state_s, XXH64_state_s
- */
-struct XXH3_state_s {
-   XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
-       /*!< The 8 accumulators. Similar to `vN` in @ref XXH32_state_s::v1 and @ref XXH64_state_s */
-   XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
-       /*!< Used to store a custom secret generated from a seed. */
-   XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
-       /*!< The internal buffer. @see XXH32_state_s::mem32 */
-   XXH32_hash_t bufferedSize;
-       /*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */
-   XXH32_hash_t useSeed;
-       /*!< Reserved field. Needed for padding on 64-bit. */
-   size_t nbStripesSoFar;
-       /*!< Number or stripes processed. */
-   XXH64_hash_t totalLen;
-       /*!< Total length hashed. 64-bit even on 32-bit targets. */
-   size_t nbStripesPerBlock;
-       /*!< Number of stripes per block. */
-   size_t secretLimit;
-       /*!< Size of @ref customSecret or @ref extSecret */
-   XXH64_hash_t seed;
-       /*!< Seed for _withSeed variants. Must be zero otherwise, @see XXH3_INITSTATE() */
-   XXH64_hash_t reserved64;
-       /*!< Reserved field. */
-   const unsigned char* extSecret;
-       /*!< Reference to an external secret for the _withSecret variants, NULL
-        *   for other variants. */
-   /* note: there may be some padding at the end due to alignment on 64 bytes */
-}; /* typedef'd to XXH3_state_t */
-
-#undef XXH_ALIGN_MEMBER
-
-/*!
- * @brief Initializes a stack-allocated `XXH3_state_s`.
- *
- * When the @ref XXH3_state_t structure is merely emplaced on stack,
- * it should be initialized with XXH3_INITSTATE() or a memset()
- * in case its first reset uses XXH3_NNbits_reset_withSeed().
- * This init can be omitted if the first reset uses default or _withSecret mode.
- * This operation isn't necessary when the state is created with XXH3_createState().
- * Note that this doesn't prepare the state for a streaming operation,
- * it's still necessary to use XXH3_NNbits_reset*() afterwards.
- */
-#define XXH3_INITSTATE(XXH3_state_ptr)   { (XXH3_state_ptr)->seed = 0; }
-
-
-/* XXH128() :
- * simple alias to pre-selected XXH3_128bits variant
- */
-XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed);
-
-
-/* ===   Experimental API   === */
-/* Symbols defined below must be considered tied to a specific library version. */
-
-/*
- * XXH3_generateSecret():
- *
- * Derive a high-entropy secret from any user-defined content, named customSeed.
- * The generated secret can be used in combination with `*_withSecret()` functions.
- * The `_withSecret()` variants are useful to provide a higher level of protection than 64-bit seed,
- * as it becomes much more difficult for an external actor to guess how to impact the calculation logic.
- *
- * The function accepts as input a custom seed of any length and any content,
- * and derives from it a high-entropy secret of length @secretSize
- * into an already allocated buffer @secretBuffer.
- * @secretSize must be >= XXH3_SECRET_SIZE_MIN
- *
- * The generated secret can then be used with any `*_withSecret()` variant.
- * Functions `XXH3_128bits_withSecret()`, `XXH3_64bits_withSecret()`,
- * `XXH3_128bits_reset_withSecret()` and `XXH3_64bits_reset_withSecret()`
- * are part of this list. They all accept a `secret` parameter
- * which must be large enough for implementation reasons (>= XXH3_SECRET_SIZE_MIN)
- * _and_ feature very high entropy (consist of random-looking bytes).
- * These conditions can be a high bar to meet, so
- * XXH3_generateSecret() can be employed to ensure proper quality.
- *
- * customSeed can be anything. It can have any size, even small ones,
- * and its content can be anything, even "poor entropy" sources such as a bunch of zeroes.
- * The resulting `secret` will nonetheless provide all required qualities.
- *
- * When customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
- */
-XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize);
-
-
-/*
- * XXH3_generateSecret_fromSeed():
- *
- * Generate the same secret as the _withSeed() variants.
- *
- * The resulting secret has a length of XXH3_SECRET_DEFAULT_SIZE (necessarily).
- * @secretBuffer must be already allocated, of size at least XXH3_SECRET_DEFAULT_SIZE bytes.
- *
- * The generated secret can be used in combination with
- *`*_withSecret()` and `_withSecretandSeed()` variants.
- * This generator is notably useful in combination with `_withSecretandSeed()`,
- * as a way to emulate a faster `_withSeed()` variant.
- */
-XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed);
-
-/*
- * *_withSecretandSeed() :
- * These variants generate hash values using either
- * @seed for "short" keys (< XXH3_MIDSIZE_MAX = 240 bytes)
- * or @secret for "large" keys (>= XXH3_MIDSIZE_MAX).
- *
- * This generally benefits speed, compared to `_withSeed()` or `_withSecret()`.
- * `_withSeed()` has to generate the secret on the fly for "large" keys.
- * It's fast, but can be perceptible for "not so large" keys (< 1 KB).
- * `_withSecret()` has to generate the masks on the fly for "small" keys,
- * which requires more instructions than _withSeed() variants.
- * Therefore, _withSecretandSeed variant combines the best of both worlds.
- *
- * When @secret has been generated by XXH3_generateSecret_fromSeed(),
- * this variant produces *exactly* the same results as `_withSeed()` variant,
- * hence offering only a pure speed benefit on "large" input,
- * by skipping the need to regenerate the secret for every large input.
- *
- * Another usage scenario is to hash the secret to a 64-bit hash value,
- * for example with XXH3_64bits(), which then becomes the seed,
- * and then employ both the seed and the secret in _withSecretandSeed().
- * On top of speed, an added benefit is that each bit in the secret
- * has a 50% chance to swap each bit in the output,
- * via its impact to the seed.
- * This is not guaranteed when using the secret directly in "small data" scenarios,
- * because only portions of the secret are employed for small data.
- */
-XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSecretandSeed(const void* data, size_t len,
-                              const void* secret, size_t secretSize,
-                              XXH64_hash_t seed);
-
-XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSecretandSeed(const void* data, size_t len,
-                               const void* secret, size_t secretSize,
-                               XXH64_hash_t seed64);
-
-XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
-                                    const void* secret, size_t secretSize,
-                                    XXH64_hash_t seed64);
-
-XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
-                                     const void* secret, size_t secretSize,
-                                     XXH64_hash_t seed64);
-
-
-#endif  /* XXH_NO_XXH3 */
-#endif  /* XXH_NO_LONG_LONG */
-#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
-#  define XXH_IMPLEMENTATION
-#endif
-
-#endif  /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */
-
-
-/* ======================================================================== */
-/* ======================================================================== */
-/* ======================================================================== */
-
-
-/*-**********************************************************************
- * xxHash implementation
- *-**********************************************************************
- * xxHash's implementation used to be hosted inside xxhash.c.
- *
- * However, inlining requires implementation to be visible to the compiler,
- * hence be included alongside the header.
- * Previously, implementation was hosted inside xxhash.c,
- * which was then #included when inlining was activated.
- * This construction created issues with a few build and install systems,
- * as it required xxhash.c to be stored in /include directory.
- *
- * xxHash implementation is now directly integrated within xxhash.h.
- * As a consequence, xxhash.c is no longer needed in /include.
- *
- * xxhash.c is still available and is still useful.
- * In a "normal" setup, when xxhash is not inlined,
- * xxhash.h only exposes the prototypes and public symbols,
- * while xxhash.c can be built into an object file xxhash.o
- * which can then be linked into the final binary.
- ************************************************************************/
-
-#if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
-   || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
-#  define XXH_IMPLEM_13a8737387
-
-/* *************************************
-*  Tuning parameters
-***************************************/
-
-/*!
- * @defgroup tuning Tuning parameters
- * @{
- *
- * Various macros to control xxHash's behavior.
- */
-#ifdef XXH_DOXYGEN
-/*!
- * @brief Define this to disable 64-bit code.
- *
- * Useful if only using the @ref xxh32_family and you have a strict C90 compiler.
- */
-#  define XXH_NO_LONG_LONG
-#  undef XXH_NO_LONG_LONG /* don't actually */
-/*!
- * @brief Controls how unaligned memory is accessed.
- *
- * By default, access to unaligned memory is controlled by `memcpy()`, which is
- * safe and portable.
- *
- * Unfortunately, on some target/compiler combinations, the generated assembly
- * is sub-optimal.
- *
- * The below switch allow selection of a different access method
- * in the search for improved performance.
- *
- * @par Possible options:
- *
- *  - `XXH_FORCE_MEMORY_ACCESS=0` (default): `memcpy`
- *   @par
- *     Use `memcpy()`. Safe and portable. Note that most modern compilers will
- *     eliminate the function call and treat it as an unaligned access.
- *
- *  - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((packed))`
- *   @par
- *     Depends on compiler extensions and is therefore not portable.
- *     This method is safe _if_ your compiler supports it,
- *     and *generally* as fast or faster than `memcpy`.
- *
- *  - `XXH_FORCE_MEMORY_ACCESS=2`: Direct cast
- *  @par
- *     Casts directly and dereferences. This method doesn't depend on the
- *     compiler, but it violates the C standard as it directly dereferences an
- *     unaligned pointer. It can generate buggy code on targets which do not
- *     support unaligned memory accesses, but in some circumstances, it's the
- *     only known way to get the most performance.
- *
- *  - `XXH_FORCE_MEMORY_ACCESS=3`: Byteshift
- *  @par
- *     Also portable. This can generate the best code on old compilers which don't
- *     inline small `memcpy()` calls, and it might also be faster on big-endian
- *     systems which lack a native byteswap instruction. However, some compilers
- *     will emit literal byteshifts even if the target supports unaligned access.
- *  .
- *
- * @warning
- *   Methods 1 and 2 rely on implementation-defined behavior. Use these with
- *   care, as what works on one compiler/platform/optimization level may cause
- *   another to read garbage data or even crash.
- *
- * See http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details.
- *
- * Prefer these methods in priority order (0 > 3 > 1 > 2)
- */
-#  define XXH_FORCE_MEMORY_ACCESS 0
-
-/*!
- * @def XXH_FORCE_ALIGN_CHECK
- * @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()
- * and XXH64() only).
- *
- * This is an important performance trick for architectures without decent
- * unaligned memory access performance.
- *
- * It checks for input alignment, and when conditions are met, uses a "fast
- * path" employing direct 32-bit/64-bit reads, resulting in _dramatically
- * faster_ read speed.
- *
- * The check costs one initial branch per hash, which is generally negligible,
- * but not zero.
- *
- * Moreover, it's not useful to generate an additional code path if memory
- * access uses the same instruction for both aligned and unaligned
- * addresses (e.g. x86 and aarch64).
- *
- * In these cases, the alignment check can be removed by setting this macro to 0.
- * Then the code will always use unaligned memory access.
- * Align check is automatically disabled on x86, x64 & arm64,
- * which are platforms known to offer good unaligned memory accesses performance.
- *
- * This option does not affect XXH3 (only XXH32 and XXH64).
- */
-#  define XXH_FORCE_ALIGN_CHECK 0
-
-/*!
- * @def XXH_NO_INLINE_HINTS
- * @brief When non-zero, sets all functions to `static`.
- *
- * By default, xxHash tries to force the compiler to inline almost all internal
- * functions.
- *
- * This can usually improve performance due to reduced jumping and improved
- * constant folding, but significantly increases the size of the binary which
- * might not be favorable.
- *
- * Additionally, sometimes the forced inlining can be detrimental to performance,
- * depending on the architecture.
- *
- * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the
- * compiler full control on whether to inline or not.
- *
- * When not optimizing (-O0), optimizing for size (-Os, -Oz), or using
- * -fno-inline with GCC or Clang, this will automatically be defined.
- */
-#  define XXH_NO_INLINE_HINTS 0
-
-/*!
- * @def XXH32_ENDJMP
- * @brief Whether to use a jump for `XXH32_finalize`.
- *
- * For performance, `XXH32_finalize` uses multiple branches in the finalizer.
- * This is generally preferable for performance,
- * but depending on exact architecture, a jmp may be preferable.
- *
- * This setting is only possibly making a difference for very small inputs.
- */
-#  define XXH32_ENDJMP 0
-
-/*!
- * @internal
- * @brief Redefines old internal names.
- *
- * For compatibility with code that uses xxHash's internals before the names
- * were changed to improve namespacing. There is no other reason to use this.
- */
-#  define XXH_OLD_NAMES
-#  undef XXH_OLD_NAMES /* don't actually use, it is ugly. */
-#endif /* XXH_DOXYGEN */
-/*!
- * @}
- */
-
-#ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-   /* prefer __packed__ structures (method 1) for gcc on armv7+ and mips */
-#  if !defined(__clang__) && \
-( \
-    (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
-    ( \
-        defined(__GNUC__) && ( \
-            (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || \
-            ( \
-                defined(__mips__) && \
-                (__mips <= 5 || __mips_isa_rev < 6) && \
-                (!defined(__mips16) || defined(__mips_mips16e2)) \
-            ) \
-        ) \
-    ) \
-)
-#    define XXH_FORCE_MEMORY_ACCESS 1
-#  endif
-#endif
-
-#ifndef XXH_FORCE_ALIGN_CHECK  /* can be defined externally */
-#  if defined(__i386)  || defined(__x86_64__) || defined(__aarch64__) \
-   || defined(_M_IX86) || defined(_M_X64)     || defined(_M_ARM64) /* visual */
-#    define XXH_FORCE_ALIGN_CHECK 0
-#  else
-#    define XXH_FORCE_ALIGN_CHECK 1
-#  endif
-#endif
-
-#ifndef XXH_NO_INLINE_HINTS
-#  if defined(__OPTIMIZE_SIZE__) /* -Os, -Oz */ \
-   || defined(__NO_INLINE__)     /* -O0, -fno-inline */
-#    define XXH_NO_INLINE_HINTS 1
-#  else
-#    define XXH_NO_INLINE_HINTS 0
-#  endif
-#endif
-
-#ifndef XXH32_ENDJMP
-/* generally preferable for performance */
-#  define XXH32_ENDJMP 0
-#endif
-
-/*!
- * @defgroup impl Implementation
- * @{
- */
-
-
-/* *************************************
-*  Includes & Memory related functions
-***************************************/
-/* Modify the local functions below should you wish to use some other memory routines */
-/* for ZSTD_malloc(), ZSTD_free() */
-#define ZSTD_DEPS_NEED_MALLOC
-#include "zstd_deps.h"  /* size_t, ZSTD_malloc, ZSTD_free, ZSTD_memcpy */
-static void* XXH_malloc(size_t s) { return ZSTD_malloc(s); }
-static void  XXH_free  (void* p)  { ZSTD_free(p); }
-static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_memcpy(dest,src,size); }
-
-
-/* *************************************
-*  Compiler Specific Options
-***************************************/
-#ifdef _MSC_VER /* Visual Studio warning fix */
-#  pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
-#endif
-
-#if XXH_NO_INLINE_HINTS  /* disable inlining hints */
-#  if defined(__GNUC__) || defined(__clang__)
-#    define XXH_FORCE_INLINE static __attribute__((unused))
-#  else
-#    define XXH_FORCE_INLINE static
-#  endif
-#  define XXH_NO_INLINE static
-/* enable inlining hints */
-#elif defined(__GNUC__) || defined(__clang__)
-#  define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
-#  define XXH_NO_INLINE static __attribute__((noinline))
-#elif defined(_MSC_VER)  /* Visual Studio */
-#  define XXH_FORCE_INLINE static __forceinline
-#  define XXH_NO_INLINE static __declspec(noinline)
-#elif defined (__cplusplus) \
-  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))   /* C99 */
-#  define XXH_FORCE_INLINE static inline
-#  define XXH_NO_INLINE static
-#else
-#  define XXH_FORCE_INLINE static
-#  define XXH_NO_INLINE static
-#endif
-
-
-
-/* *************************************
-*  Debug
-***************************************/
-/*!
- * @ingroup tuning
- * @def XXH_DEBUGLEVEL
- * @brief Sets the debugging level.
- *
- * XXH_DEBUGLEVEL is expected to be defined externally, typically via the
- * compiler's command line options. The value must be a number.
- */
-#ifndef XXH_DEBUGLEVEL
-#  ifdef DEBUGLEVEL /* backwards compat */
-#    define XXH_DEBUGLEVEL DEBUGLEVEL
-#  else
-#    define XXH_DEBUGLEVEL 0
-#  endif
-#endif
-
-#if (XXH_DEBUGLEVEL>=1)
-#  include <assert.h>   /* note: can still be disabled with NDEBUG */
-#  define XXH_ASSERT(c)   assert(c)
-#else
-#  define XXH_ASSERT(c)   ((void)0)
-#endif
-
-/* note: use after variable declarations */
-#ifndef XXH_STATIC_ASSERT
-#  if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)    /* C11 */
-#    include <assert.h>
-#    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
-#  elif defined(__cplusplus) && (__cplusplus >= 201103L)            /* C++11 */
-#    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
-#  else
-#    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0)
-#  endif
-#  define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c)
-#endif
-
-/*!
- * @internal
- * @def XXH_COMPILER_GUARD(var)
- * @brief Used to prevent unwanted optimizations for @p var.
- *
- * It uses an empty GCC inline assembly statement with a register constraint
- * which forces @p var into a general purpose register (eg eax, ebx, ecx
- * on x86) and marks it as modified.
- *
- * This is used in a few places to avoid unwanted autovectorization (e.g.
- * XXH32_round()). All vectorization we want is explicit via intrinsics,
- * and _usually_ isn't wanted elsewhere.
- *
- * We also use it to prevent unwanted constant folding for AArch64 in
- * XXH3_initCustomSecret_scalar().
- */
-#if defined(__GNUC__) || defined(__clang__)
-#  define XXH_COMPILER_GUARD(var) __asm__ __volatile__("" : "+r" (var))
-#else
-#  define XXH_COMPILER_GUARD(var) ((void)0)
-#endif
-
-/* *************************************
-*  Basic Types
-***************************************/
-#if !defined (__VMS) \
- && (defined (__cplusplus) \
- || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-# include <stdint.h>
-  typedef uint8_t xxh_u8;
-#else
-  typedef unsigned char xxh_u8;
-#endif
-typedef XXH32_hash_t xxh_u32;
-
-#ifdef XXH_OLD_NAMES
-#  define BYTE xxh_u8
-#  define U8   xxh_u8
-#  define U32  xxh_u32
-#endif
-
-/* ***   Memory access   *** */
-
-/*!
- * @internal
- * @fn xxh_u32 XXH_read32(const void* ptr)
- * @brief Reads an unaligned 32-bit integer from @p ptr in native endianness.
- *
- * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
- *
- * @param ptr The pointer to read from.
- * @return The 32-bit native endian integer from the bytes at @p ptr.
- */
-
-/*!
- * @internal
- * @fn xxh_u32 XXH_readLE32(const void* ptr)
- * @brief Reads an unaligned 32-bit little endian integer from @p ptr.
- *
- * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
- *
- * @param ptr The pointer to read from.
- * @return The 32-bit little endian integer from the bytes at @p ptr.
- */
-
-/*!
- * @internal
- * @fn xxh_u32 XXH_readBE32(const void* ptr)
- * @brief Reads an unaligned 32-bit big endian integer from @p ptr.
- *
- * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
- *
- * @param ptr The pointer to read from.
- * @return The 32-bit big endian integer from the bytes at @p ptr.
- */
-
-/*!
- * @internal
- * @fn xxh_u32 XXH_readLE32_align(const void* ptr, XXH_alignment align)
- * @brief Like @ref XXH_readLE32(), but has an option for aligned reads.
- *
- * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
- * Note that when @ref XXH_FORCE_ALIGN_CHECK == 0, the @p align parameter is
- * always @ref XXH_alignment::XXH_unaligned.
- *
- * @param ptr The pointer to read from.
- * @param align Whether @p ptr is aligned.
- * @pre
- *   If @p align == @ref XXH_alignment::XXH_aligned, @p ptr must be 4 byte
- *   aligned.
- * @return The 32-bit little endian integer from the bytes at @p ptr.
- */
-
-#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
-/*
- * Manual byteshift. Best for old compilers which don't inline memcpy.
- * We actually directly use XXH_readLE32 and XXH_readBE32.
- */
-#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
-
-/*
- * Force direct memory access. Only works on CPU which support unaligned memory
- * access in hardware.
- */
-static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
-
-#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
-
-/*
- * __pack instructions are safer but compiler specific, hence potentially
- * problematic for some compilers.
- *
- * Currently only defined for GCC and ICC.
- */
-#ifdef XXH_OLD_NAMES
-typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
-#endif
-static xxh_u32 XXH_read32(const void* ptr)
-{
-    typedef union { xxh_u32 u32; } __attribute__((packed)) xxh_unalign;
-    return ((const xxh_unalign*)ptr)->u32;
-}
-
-#else
-
-/*
- * Portable and safe solution. Generally efficient.
- * see: http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
- */
-static xxh_u32 XXH_read32(const void* memPtr)
-{
-    xxh_u32 val;
-    XXH_memcpy(&val, memPtr, sizeof(val));
-    return val;
-}
-
-#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
-
-
-/* ***   Endianness   *** */
-
-/*!
- * @ingroup tuning
- * @def XXH_CPU_LITTLE_ENDIAN
- * @brief Whether the target is little endian.
- *
- * Defined to 1 if the target is little endian, or 0 if it is big endian.
- * It can be defined externally, for example on the compiler command line.
- *
- * If it is not defined,
- * a runtime check (which is usually constant folded) is used instead.
- *
- * @note
- *   This is not necessarily defined to an integer constant.
- *
- * @see XXH_isLittleEndian() for the runtime check.
- */
-#ifndef XXH_CPU_LITTLE_ENDIAN
-/*
- * Try to detect endianness automatically, to avoid the nonstandard behavior
- * in `XXH_isLittleEndian()`
- */
-#  if defined(_WIN32) /* Windows is always little endian */ \
-     || defined(__LITTLE_ENDIAN__) \
-     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
-#    define XXH_CPU_LITTLE_ENDIAN 1
-#  elif defined(__BIG_ENDIAN__) \
-     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
-#    define XXH_CPU_LITTLE_ENDIAN 0
-#  else
-/*!
- * @internal
- * @brief Runtime check for @ref XXH_CPU_LITTLE_ENDIAN.
- *
- * Most compilers will constant fold this.
- */
-static int XXH_isLittleEndian(void)
-{
-    /*
-     * Portable and well-defined behavior.
-     * Don't use static: it is detrimental to performance.
-     */
-    const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 };
-    return one.c[0];
-}
-#   define XXH_CPU_LITTLE_ENDIAN   XXH_isLittleEndian()
-#  endif
-#endif
-
-
-
-
-/* ****************************************
-*  Compiler-specific Functions and Macros
-******************************************/
-#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
-
-#ifdef __has_builtin
-#  define XXH_HAS_BUILTIN(x) __has_builtin(x)
-#else
-#  define XXH_HAS_BUILTIN(x) 0
-#endif
-
-/*!
- * @internal
- * @def XXH_rotl32(x,r)
- * @brief 32-bit rotate left.
- *
- * @param x The 32-bit integer to be rotated.
- * @param r The number of bits to rotate.
- * @pre
- *   @p r > 0 && @p r < 32
- * @note
- *   @p x and @p r may be evaluated multiple times.
- * @return The rotated result.
- */
-#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \
-                               && XXH_HAS_BUILTIN(__builtin_rotateleft64)
-#  define XXH_rotl32 __builtin_rotateleft32
-#  define XXH_rotl64 __builtin_rotateleft64
-/* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */
-#elif defined(_MSC_VER)
-#  define XXH_rotl32(x,r) _rotl(x,r)
-#  define XXH_rotl64(x,r) _rotl64(x,r)
-#else
-#  define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
-#  define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
-#endif
-
-/*!
- * @internal
- * @fn xxh_u32 XXH_swap32(xxh_u32 x)
- * @brief A 32-bit byteswap.
- *
- * @param x The 32-bit integer to byteswap.
- * @return @p x, byteswapped.
- */
-#if defined(_MSC_VER)     /* Visual Studio */
-#  define XXH_swap32 _byteswap_ulong
-#elif XXH_GCC_VERSION >= 403
-#  define XXH_swap32 __builtin_bswap32
-#else
-static xxh_u32 XXH_swap32 (xxh_u32 x)
-{
-    return  ((x << 24) & 0xff000000 ) |
-            ((x <<  8) & 0x00ff0000 ) |
-            ((x >>  8) & 0x0000ff00 ) |
-            ((x >> 24) & 0x000000ff );
-}
-#endif
-
-
-/* ***************************
-*  Memory reads
-*****************************/
-
-/*!
- * @internal
- * @brief Enum to indicate whether a pointer is aligned.
- */
-typedef enum {
-    XXH_aligned,  /*!< Aligned */
-    XXH_unaligned /*!< Possibly unaligned */
-} XXH_alignment;
-
-/*
- * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load.
- *
- * This is ideal for older compilers which don't inline memcpy.
- */
-#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
-
-XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr)
-{
-    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
-    return bytePtr[0]
-         | ((xxh_u32)bytePtr[1] << 8)
-         | ((xxh_u32)bytePtr[2] << 16)
-         | ((xxh_u32)bytePtr[3] << 24);
-}
-
-XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr)
-{
-    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
-    return bytePtr[3]
-         | ((xxh_u32)bytePtr[2] << 8)
-         | ((xxh_u32)bytePtr[1] << 16)
-         | ((xxh_u32)bytePtr[0] << 24);
-}
-
-#else
-XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)
-{
-    return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
-}
-
-static xxh_u32 XXH_readBE32(const void* ptr)
-{
-    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
-}
-#endif
-
-XXH_FORCE_INLINE xxh_u32
-XXH_readLE32_align(const void* ptr, XXH_alignment align)
-{
-    if (align==XXH_unaligned) {
-        return XXH_readLE32(ptr);
-    } else {
-        return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);
-    }
-}
-
-
-/* *************************************
-*  Misc
-***************************************/
-/*! @ingroup public */
-XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
-
-
-/* *******************************************************************
-*  32-bit hash functions
-*********************************************************************/
-/*!
- * @}
- * @defgroup xxh32_impl XXH32 implementation
- * @ingroup impl
- * @{
- */
- /* #define instead of static const, to be used as initializers */
-#define XXH_PRIME32_1  0x9E3779B1U  /*!< 0b10011110001101110111100110110001 */
-#define XXH_PRIME32_2  0x85EBCA77U  /*!< 0b10000101111010111100101001110111 */
-#define XXH_PRIME32_3  0xC2B2AE3DU  /*!< 0b11000010101100101010111000111101 */
-#define XXH_PRIME32_4  0x27D4EB2FU  /*!< 0b00100111110101001110101100101111 */
-#define XXH_PRIME32_5  0x165667B1U  /*!< 0b00010110010101100110011110110001 */
-
-#ifdef XXH_OLD_NAMES
-#  define PRIME32_1 XXH_PRIME32_1
-#  define PRIME32_2 XXH_PRIME32_2
-#  define PRIME32_3 XXH_PRIME32_3
-#  define PRIME32_4 XXH_PRIME32_4
-#  define PRIME32_5 XXH_PRIME32_5
-#endif
-
-/*!
- * @internal
- * @brief Normal stripe processing routine.
- *
- * This shuffles the bits so that any bit from @p input impacts several bits in
- * @p acc.
- *
- * @param acc The accumulator lane.
- * @param input The stripe of input to mix.
- * @return The mixed accumulator lane.
- */
-static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
-{
-    acc += input * XXH_PRIME32_2;
-    acc  = XXH_rotl32(acc, 13);
-    acc *= XXH_PRIME32_1;
-#if (defined(__SSE4_1__) || defined(__aarch64__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
-    /*
-     * UGLY HACK:
-     * A compiler fence is the only thing that prevents GCC and Clang from
-     * autovectorizing the XXH32 loop (pragmas and attributes don't work for some
-     * reason) without globally disabling SSE4.1.
-     *
-     * The reason we want to avoid vectorization is because despite working on
-     * 4 integers at a time, there are multiple factors slowing XXH32 down on
-     * SSE4:
-     * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on
-     *   newer chips!) making it slightly slower to multiply four integers at
-     *   once compared to four integers independently. Even when pmulld was
-     *   fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE
-     *   just to multiply unless doing a long operation.
-     *
-     * - Four instructions are required to rotate,
-     *      movqda tmp,  v // not required with VEX encoding
-     *      pslld  tmp, 13 // tmp <<= 13
-     *      psrld  v,   19 // x >>= 19
-     *      por    v,  tmp // x |= tmp
-     *   compared to one for scalar:
-     *      roll   v, 13    // reliably fast across the board
-     *      shldl  v, v, 13 // Sandy Bridge and later prefer this for some reason
-     *
-     * - Instruction level parallelism is actually more beneficial here because
-     *   the SIMD actually serializes this operation: While v1 is rotating, v2
-     *   can load data, while v3 can multiply. SSE forces them to operate
-     *   together.
-     *
-     * This is also enabled on AArch64, as Clang autovectorizes it incorrectly
-     * and it is pointless writing a NEON implementation that is basically the
-     * same speed as scalar for XXH32.
-     */
-    XXH_COMPILER_GUARD(acc);
-#endif
-    return acc;
-}
-
-/*!
- * @internal
- * @brief Mixes all bits to finalize the hash.
- *
- * The final mix ensures that all input bits have a chance to impact any bit in
- * the output digest, resulting in an unbiased distribution.
- *
- * @param h32 The hash to avalanche.
- * @return The avalanched hash.
- */
-static xxh_u32 XXH32_avalanche(xxh_u32 h32)
-{
-    h32 ^= h32 >> 15;
-    h32 *= XXH_PRIME32_2;
-    h32 ^= h32 >> 13;
-    h32 *= XXH_PRIME32_3;
-    h32 ^= h32 >> 16;
-    return(h32);
-}
-
-#define XXH_get32bits(p) XXH_readLE32_align(p, align)
-
-/*!
- * @internal
- * @brief Processes the last 0-15 bytes of @p ptr.
- *
- * There may be up to 15 bytes remaining to consume from the input.
- * This final stage will digest them to ensure that all input bytes are present
- * in the final mix.
- *
- * @param h32 The hash to finalize.
- * @param ptr The pointer to the remaining input.
- * @param len The remaining length, modulo 16.
- * @param align Whether @p ptr is aligned.
- * @return The finalized hash.
- */
-static xxh_u32
-XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
-{
-#define XXH_PROCESS1 do {                           \
-    h32 += (*ptr++) * XXH_PRIME32_5;                \
-    h32 = XXH_rotl32(h32, 11) * XXH_PRIME32_1;      \
-} while (0)
-
-#define XXH_PROCESS4 do {                           \
-    h32 += XXH_get32bits(ptr) * XXH_PRIME32_3;      \
-    ptr += 4;                                   \
-    h32  = XXH_rotl32(h32, 17) * XXH_PRIME32_4;     \
-} while (0)
-
-    if (ptr==NULL) XXH_ASSERT(len == 0);
-
-    /* Compact rerolled version; generally faster */
-    if (!XXH32_ENDJMP) {
-        len &= 15;
-        while (len >= 4) {
-            XXH_PROCESS4;
-            len -= 4;
-        }
-        while (len > 0) {
-            XXH_PROCESS1;
-            --len;
-        }
-        return XXH32_avalanche(h32);
-    } else {
-         switch(len&15) /* or switch(bEnd - p) */ {
-           case 12:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
-           case 8:       XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
-           case 4:       XXH_PROCESS4;
-                         return XXH32_avalanche(h32);
-
-           case 13:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
-           case 9:       XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
-           case 5:       XXH_PROCESS4;
-                         XXH_PROCESS1;
-                         return XXH32_avalanche(h32);
-
-           case 14:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
-           case 10:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
-           case 6:       XXH_PROCESS4;
-                         XXH_PROCESS1;
-                         XXH_PROCESS1;
-                         return XXH32_avalanche(h32);
-
-           case 15:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
-           case 11:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
-           case 7:       XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
-           case 3:       XXH_PROCESS1;
-                         XXH_FALLTHROUGH;
-           case 2:       XXH_PROCESS1;
-                         XXH_FALLTHROUGH;
-           case 1:       XXH_PROCESS1;
-                         XXH_FALLTHROUGH;
-           case 0:       return XXH32_avalanche(h32);
-        }
-        XXH_ASSERT(0);
-        return h32;   /* reaching this point is deemed impossible */
-    }
-}
-
-#ifdef XXH_OLD_NAMES
-#  define PROCESS1 XXH_PROCESS1
-#  define PROCESS4 XXH_PROCESS4
-#else
-#  undef XXH_PROCESS1
-#  undef XXH_PROCESS4
-#endif
-
-/*!
- * @internal
- * @brief The implementation for @ref XXH32().
- *
- * @param input , len , seed Directly passed from @ref XXH32().
- * @param align Whether @p input is aligned.
- * @return The calculated hash.
- */
-XXH_FORCE_INLINE xxh_u32
-XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
-{
-    xxh_u32 h32;
-
-    if (input==NULL) XXH_ASSERT(len == 0);
-
-    if (len>=16) {
-        const xxh_u8* const bEnd = input + len;
-        const xxh_u8* const limit = bEnd - 15;
-        xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
-        xxh_u32 v2 = seed + XXH_PRIME32_2;
-        xxh_u32 v3 = seed + 0;
-        xxh_u32 v4 = seed - XXH_PRIME32_1;
-
-        do {
-            v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;
-            v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;
-            v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;
-            v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;
-        } while (input < limit);
-
-        h32 = XXH_rotl32(v1, 1)  + XXH_rotl32(v2, 7)
-            + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
-    } else {
-        h32  = seed + XXH_PRIME32_5;
-    }
-
-    h32 += (xxh_u32)len;
-
-    return XXH32_finalize(h32, input, len&15, align);
-}
-
-/*! @ingroup xxh32_family */
-XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
-{
-#if 0
-    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
-    XXH32_state_t state;
-    XXH32_reset(&state, seed);
-    XXH32_update(&state, (const xxh_u8*)input, len);
-    return XXH32_digest(&state);
-#else
-    if (XXH_FORCE_ALIGN_CHECK) {
-        if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
-            return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
-    }   }
-
-    return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
-#endif
-}
-
-
-
-/*******   Hash streaming   *******/
-/*!
- * @ingroup xxh32_family
- */
-XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
-{
-    return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
-}
-/*! @ingroup xxh32_family */
-XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
-{
-    XXH_free(statePtr);
-    return XXH_OK;
-}
-
-/*! @ingroup xxh32_family */
-XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
-{
-    XXH_memcpy(dstState, srcState, sizeof(*dstState));
-}
-
-/*! @ingroup xxh32_family */
-XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
-{
-    XXH_ASSERT(statePtr != NULL);
-    memset(statePtr, 0, sizeof(*statePtr));
-    statePtr->v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
-    statePtr->v[1] = seed + XXH_PRIME32_2;
-    statePtr->v[2] = seed + 0;
-    statePtr->v[3] = seed - XXH_PRIME32_1;
-    return XXH_OK;
-}
-
-
-/*! @ingroup xxh32_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH32_update(XXH32_state_t* state, const void* input, size_t len)
-{
-    if (input==NULL) {
-        XXH_ASSERT(len == 0);
-        return XXH_OK;
-    }
-
-    {   const xxh_u8* p = (const xxh_u8*)input;
-        const xxh_u8* const bEnd = p + len;
-
-        state->total_len_32 += (XXH32_hash_t)len;
-        state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
-
-        if (state->memsize + len < 16)  {   /* fill in tmp buffer */
-            XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len);
-            state->memsize += (XXH32_hash_t)len;
-            return XXH_OK;
-        }
-
-        if (state->memsize) {   /* some data left from previous update */
-            XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
-            {   const xxh_u32* p32 = state->mem32;
-                state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++;
-                state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++;
-                state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++;
-                state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32));
-            }
-            p += 16-state->memsize;
-            state->memsize = 0;
-        }
-
-        if (p <= bEnd-16) {
-            const xxh_u8* const limit = bEnd - 16;
-
-            do {
-                state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p)); p+=4;
-                state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p)); p+=4;
-                state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p)); p+=4;
-                state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p)); p+=4;
-            } while (p<=limit);
-
-        }
-
-        if (p < bEnd) {
-            XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
-            state->memsize = (unsigned)(bEnd-p);
-        }
-    }
-
-    return XXH_OK;
-}
-
-
-/*! @ingroup xxh32_family */
-XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
-{
-    xxh_u32 h32;
-
-    if (state->large_len) {
-        h32 = XXH_rotl32(state->v[0], 1)
-            + XXH_rotl32(state->v[1], 7)
-            + XXH_rotl32(state->v[2], 12)
-            + XXH_rotl32(state->v[3], 18);
-    } else {
-        h32 = state->v[2] /* == seed */ + XXH_PRIME32_5;
-    }
-
-    h32 += state->total_len_32;
-
-    return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
-}
-
-
-/*******   Canonical representation   *******/
-
-/*!
- * @ingroup xxh32_family
- * The default return values from XXH functions are unsigned 32 and 64 bit
- * integers.
- *
- * The canonical representation uses big endian convention, the same convention
- * as human-readable numbers (large digits first).
- *
- * This way, hash values can be written into a file or buffer, remaining
- * comparable across different systems.
- *
- * The following functions allow transformation of hash values to and from their
- * canonical format.
- */
-XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
-{
-    /* XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); */
-    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
-    XXH_memcpy(dst, &hash, sizeof(*dst));
-}
-/*! @ingroup xxh32_family */
-XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
-{
-    return XXH_readBE32(src);
-}
-
-
-#ifndef XXH_NO_LONG_LONG
-
-/* *******************************************************************
-*  64-bit hash functions
-*********************************************************************/
-/*!
- * @}
- * @ingroup impl
- * @{
- */
-/*******   Memory access   *******/
-
-typedef XXH64_hash_t xxh_u64;
-
-#ifdef XXH_OLD_NAMES
-#  define U64 xxh_u64
-#endif
-
-#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
-/*
- * Manual byteshift. Best for old compilers which don't inline memcpy.
- * We actually directly use XXH_readLE64 and XXH_readBE64.
- */
-#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
-
-/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
-static xxh_u64 XXH_read64(const void* memPtr)
-{
-    return *(const xxh_u64*) memPtr;
-}
-
-#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
-
-/*
- * __pack instructions are safer, but compiler specific, hence potentially
- * problematic for some compilers.
- *
- * Currently only defined for GCC and ICC.
- */
-#ifdef XXH_OLD_NAMES
-typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
-#endif
-static xxh_u64 XXH_read64(const void* ptr)
-{
-    typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) xxh_unalign64;
-    return ((const xxh_unalign64*)ptr)->u64;
-}
-
-#else
-
-/*
- * Portable and safe solution. Generally efficient.
- * see: http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
- */
-static xxh_u64 XXH_read64(const void* memPtr)
-{
-    xxh_u64 val;
-    XXH_memcpy(&val, memPtr, sizeof(val));
-    return val;
-}
-
-#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
-
-#if defined(_MSC_VER)     /* Visual Studio */
-#  define XXH_swap64 _byteswap_uint64
-#elif XXH_GCC_VERSION >= 403
-#  define XXH_swap64 __builtin_bswap64
-#else
-static xxh_u64 XXH_swap64(xxh_u64 x)
-{
-    return  ((x << 56) & 0xff00000000000000ULL) |
-            ((x << 40) & 0x00ff000000000000ULL) |
-            ((x << 24) & 0x0000ff0000000000ULL) |
-            ((x << 8)  & 0x000000ff00000000ULL) |
-            ((x >> 8)  & 0x00000000ff000000ULL) |
-            ((x >> 24) & 0x0000000000ff0000ULL) |
-            ((x >> 40) & 0x000000000000ff00ULL) |
-            ((x >> 56) & 0x00000000000000ffULL);
-}
-#endif
-
-
-/* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */
-#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
-
-XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr)
-{
-    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
-    return bytePtr[0]
-         | ((xxh_u64)bytePtr[1] << 8)
-         | ((xxh_u64)bytePtr[2] << 16)
-         | ((xxh_u64)bytePtr[3] << 24)
-         | ((xxh_u64)bytePtr[4] << 32)
-         | ((xxh_u64)bytePtr[5] << 40)
-         | ((xxh_u64)bytePtr[6] << 48)
-         | ((xxh_u64)bytePtr[7] << 56);
-}
-
-XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr)
-{
-    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
-    return bytePtr[7]
-         | ((xxh_u64)bytePtr[6] << 8)
-         | ((xxh_u64)bytePtr[5] << 16)
-         | ((xxh_u64)bytePtr[4] << 24)
-         | ((xxh_u64)bytePtr[3] << 32)
-         | ((xxh_u64)bytePtr[2] << 40)
-         | ((xxh_u64)bytePtr[1] << 48)
-         | ((xxh_u64)bytePtr[0] << 56);
-}
-
-#else
-XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)
-{
-    return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
-}
-
-static xxh_u64 XXH_readBE64(const void* ptr)
-{
-    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
-}
-#endif
-
-XXH_FORCE_INLINE xxh_u64
-XXH_readLE64_align(const void* ptr, XXH_alignment align)
-{
-    if (align==XXH_unaligned)
-        return XXH_readLE64(ptr);
-    else
-        return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);
-}
-
-
-/*******   xxh64   *******/
-/*!
- * @}
- * @defgroup xxh64_impl XXH64 implementation
- * @ingroup impl
- * @{
- */
-/* #define rather that static const, to be used as initializers */
-#define XXH_PRIME64_1  0x9E3779B185EBCA87ULL  /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */
-#define XXH_PRIME64_2  0xC2B2AE3D27D4EB4FULL  /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */
-#define XXH_PRIME64_3  0x165667B19E3779F9ULL  /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */
-#define XXH_PRIME64_4  0x85EBCA77C2B2AE63ULL  /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */
-#define XXH_PRIME64_5  0x27D4EB2F165667C5ULL  /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */
-
-#ifdef XXH_OLD_NAMES
-#  define PRIME64_1 XXH_PRIME64_1
-#  define PRIME64_2 XXH_PRIME64_2
-#  define PRIME64_3 XXH_PRIME64_3
-#  define PRIME64_4 XXH_PRIME64_4
-#  define PRIME64_5 XXH_PRIME64_5
-#endif
-
-static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
-{
-    acc += input * XXH_PRIME64_2;
-    acc  = XXH_rotl64(acc, 31);
-    acc *= XXH_PRIME64_1;
-    return acc;
-}
-
-static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
-{
-    val  = XXH64_round(0, val);
-    acc ^= val;
-    acc  = acc * XXH_PRIME64_1 + XXH_PRIME64_4;
-    return acc;
-}
-
-static xxh_u64 XXH64_avalanche(xxh_u64 h64)
-{
-    h64 ^= h64 >> 33;
-    h64 *= XXH_PRIME64_2;
-    h64 ^= h64 >> 29;
-    h64 *= XXH_PRIME64_3;
-    h64 ^= h64 >> 32;
-    return h64;
-}
-
-
-#define XXH_get64bits(p) XXH_readLE64_align(p, align)
-
-static xxh_u64
-XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
-{
-    if (ptr==NULL) XXH_ASSERT(len == 0);
-    len &= 31;
-    while (len >= 8) {
-        xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
-        ptr += 8;
-        h64 ^= k1;
-        h64  = XXH_rotl64(h64,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
-        len -= 8;
-    }
-    if (len >= 4) {
-        h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
-        ptr += 4;
-        h64 = XXH_rotl64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
-        len -= 4;
-    }
-    while (len > 0) {
-        h64 ^= (*ptr++) * XXH_PRIME64_5;
-        h64 = XXH_rotl64(h64, 11) * XXH_PRIME64_1;
-        --len;
-    }
-    return  XXH64_avalanche(h64);
-}
-
-#ifdef XXH_OLD_NAMES
-#  define PROCESS1_64 XXH_PROCESS1_64
-#  define PROCESS4_64 XXH_PROCESS4_64
-#  define PROCESS8_64 XXH_PROCESS8_64
-#else
-#  undef XXH_PROCESS1_64
-#  undef XXH_PROCESS4_64
-#  undef XXH_PROCESS8_64
-#endif
-
-XXH_FORCE_INLINE xxh_u64
-XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
-{
-    xxh_u64 h64;
-    if (input==NULL) XXH_ASSERT(len == 0);
-
-    if (len>=32) {
-        const xxh_u8* const bEnd = input + len;
-        const xxh_u8* const limit = bEnd - 31;
-        xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
-        xxh_u64 v2 = seed + XXH_PRIME64_2;
-        xxh_u64 v3 = seed + 0;
-        xxh_u64 v4 = seed - XXH_PRIME64_1;
-
-        do {
-            v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;
-            v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
-            v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
-            v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
-        } while (input<limit);
-
-        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
-        h64 = XXH64_mergeRound(h64, v1);
-        h64 = XXH64_mergeRound(h64, v2);
-        h64 = XXH64_mergeRound(h64, v3);
-        h64 = XXH64_mergeRound(h64, v4);
-
-    } else {
-        h64  = seed + XXH_PRIME64_5;
-    }
-
-    h64 += (xxh_u64) len;
-
-    return XXH64_finalize(h64, input, len, align);
-}
-
-
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)
-{
-#if 0
-    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
-    XXH64_state_t state;
-    XXH64_reset(&state, seed);
-    XXH64_update(&state, (const xxh_u8*)input, len);
-    return XXH64_digest(&state);
-#else
-    if (XXH_FORCE_ALIGN_CHECK) {
-        if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
-            return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
-    }   }
-
-    return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
-
-#endif
-}
-
-/*******   Hash Streaming   *******/
-
-/*! @ingroup xxh64_family*/
-XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
-{
-    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
-}
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
-{
-    XXH_free(statePtr);
-    return XXH_OK;
-}
-
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
-{
-    XXH_memcpy(dstState, srcState, sizeof(*dstState));
-}
-
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
-{
-    XXH_ASSERT(statePtr != NULL);
-    memset(statePtr, 0, sizeof(*statePtr));
-    statePtr->v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
-    statePtr->v[1] = seed + XXH_PRIME64_2;
-    statePtr->v[2] = seed + 0;
-    statePtr->v[3] = seed - XXH_PRIME64_1;
-    return XXH_OK;
-}
-
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH64_update (XXH64_state_t* state, const void* input, size_t len)
-{
-    if (input==NULL) {
-        XXH_ASSERT(len == 0);
-        return XXH_OK;
-    }
-
-    {   const xxh_u8* p = (const xxh_u8*)input;
-        const xxh_u8* const bEnd = p + len;
-
-        state->total_len += len;
-
-        if (state->memsize + len < 32) {  /* fill in tmp buffer */
-            XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);
-            state->memsize += (xxh_u32)len;
-            return XXH_OK;
-        }
-
-        if (state->memsize) {   /* tmp buffer is full */
-            XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
-            state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0));
-            state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1));
-            state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2));
-            state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3));
-            p += 32 - state->memsize;
-            state->memsize = 0;
-        }
-
-        if (p+32 <= bEnd) {
-            const xxh_u8* const limit = bEnd - 32;
-
-            do {
-                state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8;
-                state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8;
-                state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8;
-                state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8;
-            } while (p<=limit);
-
-        }
-
-        if (p < bEnd) {
-            XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
-            state->memsize = (unsigned)(bEnd-p);
-        }
-    }
-
-    return XXH_OK;
-}
-
-
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t* state)
-{
-    xxh_u64 h64;
-
-    if (state->total_len >= 32) {
-        h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18);
-        h64 = XXH64_mergeRound(h64, state->v[0]);
-        h64 = XXH64_mergeRound(h64, state->v[1]);
-        h64 = XXH64_mergeRound(h64, state->v[2]);
-        h64 = XXH64_mergeRound(h64, state->v[3]);
-    } else {
-        h64  = state->v[2] /*seed*/ + XXH_PRIME64_5;
-    }
-
-    h64 += (xxh_u64) state->total_len;
-
-    return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
-}
-
-
-/******* Canonical representation   *******/
-
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
-{
-    /* XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); */
-    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
-    XXH_memcpy(dst, &hash, sizeof(*dst));
-}
-
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
-{
-    return XXH_readBE64(src);
-}
-
-#ifndef XXH_NO_XXH3
-
-/* *********************************************************************
-*  XXH3
-*  New generation hash designed for speed on small keys and vectorization
-************************************************************************ */
-/*!
- * @}
- * @defgroup xxh3_impl XXH3 implementation
- * @ingroup impl
- * @{
- */
-
-/* ===   Compiler specifics   === */
-
-#if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */
-#  define XXH_RESTRICT /* disable */
-#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* >= C99 */
-#  define XXH_RESTRICT   restrict
-#else
-/* Note: it might be useful to define __restrict or __restrict__ for some C++ compilers */
-#  define XXH_RESTRICT   /* disable */
-#endif
-
-#if (defined(__GNUC__) && (__GNUC__ >= 3))  \
-  || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \
-  || defined(__clang__)
-#    define XXH_likely(x) __builtin_expect(x, 1)
-#    define XXH_unlikely(x) __builtin_expect(x, 0)
-#else
-#    define XXH_likely(x) (x)
-#    define XXH_unlikely(x) (x)
-#endif
-
-#if defined(__GNUC__) || defined(__clang__)
-#  if defined(__ARM_NEON__) || defined(__ARM_NEON) \
-   || defined(__aarch64__)  || defined(_M_ARM) \
-   || defined(_M_ARM64)     || defined(_M_ARM64EC)
-#    define inline __inline__  /* circumvent a clang bug */
-#    include <arm_neon.h>
-#    undef inline
-#  elif defined(__AVX2__)
-#    include <immintrin.h>
-#  elif defined(__SSE2__)
-#    include <emmintrin.h>
-#  endif
-#endif
-
-#if defined(_MSC_VER)
-#  include <intrin.h>
-#endif
-
-/*
- * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while
- * remaining a true 64-bit/128-bit hash function.
- *
- * This is done by prioritizing a subset of 64-bit operations that can be
- * emulated without too many steps on the average 32-bit machine.
- *
- * For example, these two lines seem similar, and run equally fast on 64-bit:
- *
- *   xxh_u64 x;
- *   x ^= (x >> 47); // good
- *   x ^= (x >> 13); // bad
- *
- * However, to a 32-bit machine, there is a major difference.
- *
- * x ^= (x >> 47) looks like this:
- *
- *   x.lo ^= (x.hi >> (47 - 32));
- *
- * while x ^= (x >> 13) looks like this:
- *
- *   // note: funnel shifts are not usually cheap.
- *   x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13));
- *   x.hi ^= (x.hi >> 13);
- *
- * The first one is significantly faster than the second, simply because the
- * shift is larger than 32. This means:
- *  - All the bits we need are in the upper 32 bits, so we can ignore the lower
- *    32 bits in the shift.
- *  - The shift result will always fit in the lower 32 bits, and therefore,
- *    we can ignore the upper 32 bits in the xor.
- *
- * Thanks to this optimization, XXH3 only requires these features to be efficient:
- *
- *  - Usable unaligned access
- *  - A 32-bit or 64-bit ALU
- *      - If 32-bit, a decent ADC instruction
- *  - A 32 or 64-bit multiply with a 64-bit result
- *  - For the 128-bit variant, a decent byteswap helps short inputs.
- *
- * The first two are already required by XXH32, and almost all 32-bit and 64-bit
- * platforms which can run XXH32 can run XXH3 efficiently.
- *
- * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one
- * notable exception.
- *
- * First of all, Thumb-1 lacks support for the UMULL instruction which
- * performs the important long multiply. This means numerous __aeabi_lmul
- * calls.
- *
- * Second of all, the 8 functional registers are just not enough.
- * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need
- * Lo registers, and this shuffling results in thousands more MOVs than A32.
- *
- * A32 and T32 don't have this limitation. They can access all 14 registers,
- * do a 32->64 multiply with UMULL, and the flexible operand allowing free
- * shifts is helpful, too.
- *
- * Therefore, we do a quick sanity check.
- *
- * If compiling Thumb-1 for a target which supports ARM instructions, we will
- * emit a warning, as it is not a "sane" platform to compile for.
- *
- * Usually, if this happens, it is because of an accident and you probably need
- * to specify -march, as you likely meant to compile for a newer architecture.
- *
- * Credit: large sections of the vectorial and asm source code paths
- *         have been contributed by @easyaspi314
- */
-#if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM)
-#   warning "XXH3 is highly inefficient without ARM or Thumb-2."
-#endif
-
-/* ==========================================
- * Vectorization detection
- * ========================================== */
-
-#ifdef XXH_DOXYGEN
-/*!
- * @ingroup tuning
- * @brief Overrides the vectorization implementation chosen for XXH3.
- *
- * Can be defined to 0 to disable SIMD or any of the values mentioned in
- * @ref XXH_VECTOR_TYPE.
- *
- * If this is not defined, it uses predefined macros to determine the best
- * implementation.
- */
-#  define XXH_VECTOR XXH_SCALAR
-/*!
- * @ingroup tuning
- * @brief Possible values for @ref XXH_VECTOR.
- *
- * Note that these are actually implemented as macros.
- *
- * If this is not defined, it is detected automatically.
- * @ref XXH_X86DISPATCH overrides this.
- */
-enum XXH_VECTOR_TYPE /* fake enum */ {
-    XXH_SCALAR = 0,  /*!< Portable scalar version */
-    XXH_SSE2   = 1,  /*!<
-                      * SSE2 for Pentium 4, Opteron, all x86_64.
-                      *
-                      * @note SSE2 is also guaranteed on Windows 10, macOS, and
-                      * Android x86.
-                      */
-    XXH_AVX2   = 2,  /*!< AVX2 for Haswell and Bulldozer */
-    XXH_AVX512 = 3,  /*!< AVX512 for Skylake and Icelake */
-    XXH_NEON   = 4,  /*!< NEON for most ARMv7-A and all AArch64 */
-    XXH_VSX    = 5,  /*!< VSX and ZVector for POWER8/z13 (64-bit) */
-};
-/*!
- * @ingroup tuning
- * @brief Selects the minimum alignment for XXH3's accumulators.
- *
- * When using SIMD, this should match the alignment reqired for said vector
- * type, so, for example, 32 for AVX2.
- *
- * Default: Auto detected.
- */
-#  define XXH_ACC_ALIGN 8
-#endif
-
-/* Actual definition */
-#ifndef XXH_DOXYGEN
-#  define XXH_SCALAR 0
-#  define XXH_SSE2   1
-#  define XXH_AVX2   2
-#  define XXH_AVX512 3
-#  define XXH_NEON   4
-#  define XXH_VSX    5
-#endif
-
-#ifndef XXH_VECTOR    /* can be defined on command line */
-#  if ( \
-        defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \
-     || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \
-   ) && ( \
-        defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \
-    || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
-   )
-#    define XXH_VECTOR XXH_NEON
-#  elif defined(__AVX512F__)
-#    define XXH_VECTOR XXH_AVX512
-#  elif defined(__AVX2__)
-#    define XXH_VECTOR XXH_AVX2
-#  elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
-#    define XXH_VECTOR XXH_SSE2
-#  elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
-     || (defined(__s390x__) && defined(__VEC__)) \
-     && defined(__GNUC__) /* TODO: IBM XL */
-#    define XXH_VECTOR XXH_VSX
-#  else
-#    define XXH_VECTOR XXH_SCALAR
-#  endif
-#endif
-
-/*
- * Controls the alignment of the accumulator,
- * for compatibility with aligned vector loads, which are usually faster.
- */
-#ifndef XXH_ACC_ALIGN
-#  if defined(XXH_X86DISPATCH)
-#     define XXH_ACC_ALIGN 64  /* for compatibility with avx512 */
-#  elif XXH_VECTOR == XXH_SCALAR  /* scalar */
-#     define XXH_ACC_ALIGN 8
-#  elif XXH_VECTOR == XXH_SSE2  /* sse2 */
-#     define XXH_ACC_ALIGN 16
-#  elif XXH_VECTOR == XXH_AVX2  /* avx2 */
-#     define XXH_ACC_ALIGN 32
-#  elif XXH_VECTOR == XXH_NEON  /* neon */
-#     define XXH_ACC_ALIGN 16
-#  elif XXH_VECTOR == XXH_VSX   /* vsx */
-#     define XXH_ACC_ALIGN 16
-#  elif XXH_VECTOR == XXH_AVX512  /* avx512 */
-#     define XXH_ACC_ALIGN 64
-#  endif
-#endif
-
-#if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
-    || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
-#  define XXH_SEC_ALIGN XXH_ACC_ALIGN
-#else
-#  define XXH_SEC_ALIGN 8
-#endif
-
-/*
- * UGLY HACK:
- * GCC usually generates the best code with -O3 for xxHash.
- *
- * However, when targeting AVX2, it is overzealous in its unrolling resulting
- * in code roughly 3/4 the speed of Clang.
- *
- * There are other issues, such as GCC splitting _mm256_loadu_si256 into
- * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which
- * only applies to Sandy and Ivy Bridge... which don't even support AVX2.
- *
- * That is why when compiling the AVX2 version, it is recommended to use either
- *   -O2 -mavx2 -march=haswell
- * or
- *   -O2 -mavx2 -mno-avx256-split-unaligned-load
- * for decent performance, or to use Clang instead.
- *
- * Fortunately, we can control the first one with a pragma that forces GCC into
- * -O2, but the other one we can't control without "failed to inline always
- * inline function due to target mismatch" warnings.
- */
-#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
-  && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
-  && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
-#  pragma GCC push_options
-#  pragma GCC optimize("-O2")
-#endif
-
-
-#if XXH_VECTOR == XXH_NEON
-/*
- * NEON's setup for vmlal_u32 is a little more complicated than it is on
- * SSE2, AVX2, and VSX.
- *
- * While PMULUDQ and VMULEUW both perform a mask, VMLAL.U32 performs an upcast.
- *
- * To do the same operation, the 128-bit 'Q' register needs to be split into
- * two 64-bit 'D' registers, performing this operation::
- *
- *   [                a                 |                 b                ]
- *            |              '---------. .--------'                |
- *            |                         x                          |
- *            |              .---------' '--------.                |
- *   [ a & 0xFFFFFFFF | b & 0xFFFFFFFF ],[    a >> 32     |     b >> 32    ]
- *
- * Due to significant changes in aarch64, the fastest method for aarch64 is
- * completely different than the fastest method for ARMv7-A.
- *
- * ARMv7-A treats D registers as unions overlaying Q registers, so modifying
- * D11 will modify the high half of Q5. This is similar to how modifying AH
- * will only affect bits 8-15 of AX on x86.
- *
- * VZIP takes two registers, and puts even lanes in one register and odd lanes
- * in the other.
- *
- * On ARMv7-A, this strangely modifies both parameters in place instead of
- * taking the usual 3-operand form.
- *
- * Therefore, if we want to do this, we can simply use a D-form VZIP.32 on the
- * lower and upper halves of the Q register to end up with the high and low
- * halves where we want - all in one instruction.
- *
- *   vzip.32   d10, d11       @ d10 = { d10[0], d11[0] }; d11 = { d10[1], d11[1] }
- *
- * Unfortunately we need inline assembly for this: Instructions modifying two
- * registers at once is not possible in GCC or Clang's IR, and they have to
- * create a copy.
- *
- * aarch64 requires a different approach.
- *
- * In order to make it easier to write a decent compiler for aarch64, many
- * quirks were removed, such as conditional execution.
- *
- * NEON was also affected by this.
- *
- * aarch64 cannot access the high bits of a Q-form register, and writes to a
- * D-form register zero the high bits, similar to how writes to W-form scalar
- * registers (or DWORD registers on x86_64) work.
- *
- * The formerly free vget_high intrinsics now require a vext (with a few
- * exceptions)
- *
- * Additionally, VZIP was replaced by ZIP1 and ZIP2, which are the equivalent
- * of PUNPCKL* and PUNPCKH* in SSE, respectively, in order to only modify one
- * operand.
- *
- * The equivalent of the VZIP.32 on the lower and upper halves would be this
- * mess:
- *
- *   ext     v2.4s, v0.4s, v0.4s, #2 // v2 = { v0[2], v0[3], v0[0], v0[1] }
- *   zip1    v1.2s, v0.2s, v2.2s     // v1 = { v0[0], v2[0] }
- *   zip2    v0.2s, v0.2s, v1.2s     // v0 = { v0[1], v2[1] }
- *
- * Instead, we use a literal downcast, vmovn_u64 (XTN), and vshrn_n_u64 (SHRN):
- *
- *   shrn    v1.2s, v0.2d, #32  // v1 = (uint32x2_t)(v0 >> 32);
- *   xtn     v0.2s, v0.2d       // v0 = (uint32x2_t)(v0 & 0xFFFFFFFF);
- *
- * This is available on ARMv7-A, but is less efficient than a single VZIP.32.
- */
-
-/*!
- * Function-like macro:
- * void XXH_SPLIT_IN_PLACE(uint64x2_t &in, uint32x2_t &outLo, uint32x2_t &outHi)
- * {
- *     outLo = (uint32x2_t)(in & 0xFFFFFFFF);
- *     outHi = (uint32x2_t)(in >> 32);
- *     in = UNDEFINED;
- * }
- */
-# if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \
-   && (defined(__GNUC__) || defined(__clang__)) \
-   && (defined(__arm__) || defined(__thumb__) || defined(_M_ARM))
-#  define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                                              \
-    do {                                                                                    \
-      /* Undocumented GCC/Clang operand modifier: %e0 = lower D half, %f0 = upper D half */ \
-      /* https://github.com/gcc-mirror/gcc/blob/38cf91e5/gcc/config/arm/arm.c#L22486 */     \
-      /* https://github.com/llvm-mirror/llvm/blob/2c4ca683/lib/Target/ARM/ARMAsmPrinter.cpp#L399 */ \
-      __asm__("vzip.32  %e0, %f0" : "+w" (in));                                             \
-      (outLo) = vget_low_u32 (vreinterpretq_u32_u64(in));                                   \
-      (outHi) = vget_high_u32(vreinterpretq_u32_u64(in));                                   \
-   } while (0)
-# else
-#  define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                                            \
-    do {                                                                                  \
-      (outLo) = vmovn_u64    (in);                                                        \
-      (outHi) = vshrn_n_u64  ((in), 32);                                                  \
-    } while (0)
-# endif
-
-/*!
- * @ingroup tuning
- * @brief Controls the NEON to scalar ratio for XXH3
- *
- * On AArch64 when not optimizing for size, XXH3 will run 6 lanes using NEON and
- * 2 lanes on scalar by default.
- *
- * This can be set to 2, 4, 6, or 8. ARMv7 will default to all 8 NEON lanes, as the
- * emulated 64-bit arithmetic is too slow.
- *
- * Modern ARM CPUs are _very_ sensitive to how their pipelines are used.
- *
- * For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but it can't
- * have more than 2 NEON (F0/F1) micro-ops. If you are only using NEON instructions,
- * you are only using 2/3 of the CPU bandwidth.
- *
- * This is even more noticable on the more advanced cores like the A76 which
- * can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once.
- *
- * Therefore, @ref XXH3_NEON_LANES lanes will be processed using NEON, and the
- * remaining lanes will use scalar instructions. This improves the bandwidth
- * and also gives the integer pipelines something to do besides twiddling loop
- * counters and pointers.
- *
- * This change benefits CPUs with large micro-op buffers without negatively affecting
- * other CPUs:
- *
- *  | Chipset               | Dispatch type       | NEON only | 6:2 hybrid | Diff. |
- *  |:----------------------|:--------------------|----------:|-----------:|------:|
- *  | Snapdragon 730 (A76)  | 2 NEON/8 micro-ops  |  8.8 GB/s |  10.1 GB/s |  ~16% |
- *  | Snapdragon 835 (A73)  | 2 NEON/3 micro-ops  |  5.1 GB/s |   5.3 GB/s |   ~5% |
- *  | Marvell PXA1928 (A53) | In-order dual-issue |  1.9 GB/s |   1.9 GB/s |    0% |
- *
- * It also seems to fix some bad codegen on GCC, making it almost as fast as clang.
- *
- * @see XXH3_accumulate_512_neon()
- */
-# ifndef XXH3_NEON_LANES
-#  if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
-   && !defined(__OPTIMIZE_SIZE__)
-#   define XXH3_NEON_LANES 6
-#  else
-#   define XXH3_NEON_LANES XXH_ACC_NB
-#  endif
-# endif
-#endif  /* XXH_VECTOR == XXH_NEON */
-
-/*
- * VSX and Z Vector helpers.
- *
- * This is very messy, and any pull requests to clean this up are welcome.
- *
- * There are a lot of problems with supporting VSX and s390x, due to
- * inconsistent intrinsics, spotty coverage, and multiple endiannesses.
- */
-#if XXH_VECTOR == XXH_VSX
-#  if defined(__s390x__)
-#    include <s390intrin.h>
-#  else
-/* gcc's altivec.h can have the unwanted consequence to unconditionally
- * #define bool, vector, and pixel keywords,
- * with bad consequences for programs already using these keywords for other purposes.
- * The paragraph defining these macros is skipped when __APPLE_ALTIVEC__ is defined.
- * __APPLE_ALTIVEC__ is _generally_ defined automatically by the compiler,
- * but it seems that, in some cases, it isn't.
- * Force the build macro to be defined, so that keywords are not altered.
- */
-#    if defined(__GNUC__) && !defined(__APPLE_ALTIVEC__)
-#      define __APPLE_ALTIVEC__
-#    endif
-#    include <altivec.h>
-#  endif
-
-typedef __vector unsigned long long xxh_u64x2;
-typedef __vector unsigned char xxh_u8x16;
-typedef __vector unsigned xxh_u32x4;
-
-# ifndef XXH_VSX_BE
-#  if defined(__BIG_ENDIAN__) \
-  || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
-#    define XXH_VSX_BE 1
-#  elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
-#    warning "-maltivec=be is not recommended. Please use native endianness."
-#    define XXH_VSX_BE 1
-#  else
-#    define XXH_VSX_BE 0
-#  endif
-# endif /* !defined(XXH_VSX_BE) */
-
-# if XXH_VSX_BE
-#  if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__))
-#    define XXH_vec_revb vec_revb
-#  else
-/*!
- * A polyfill for POWER9's vec_revb().
- */
-XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val)
-{
-    xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
-                                  0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
-    return vec_perm(val, val, vByteSwap);
-}
-#  endif
-# endif /* XXH_VSX_BE */
-
-/*!
- * Performs an unaligned vector load and byte swaps it on big endian.
- */
-XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
-{
-    xxh_u64x2 ret;
-    XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2));
-# if XXH_VSX_BE
-    ret = XXH_vec_revb(ret);
-# endif
-    return ret;
-}
-
-/*
- * vec_mulo and vec_mule are very problematic intrinsics on PowerPC
- *
- * These intrinsics weren't added until GCC 8, despite existing for a while,
- * and they are endian dependent. Also, their meaning swap depending on version.
- * */
-# if defined(__s390x__)
- /* s390x is always big endian, no issue on this platform */
-#  define XXH_vec_mulo vec_mulo
-#  define XXH_vec_mule vec_mule
-# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw)
-/* Clang has a better way to control this, we can just use the builtin which doesn't swap. */
-#  define XXH_vec_mulo __builtin_altivec_vmulouw
-#  define XXH_vec_mule __builtin_altivec_vmuleuw
-# else
-/* gcc needs inline assembly */
-/* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */
-XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b)
-{
-    xxh_u64x2 result;
-    __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
-    return result;
-}
-XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)
-{
-    xxh_u64x2 result;
-    __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
-    return result;
-}
-# endif /* XXH_vec_mulo, XXH_vec_mule */
-#endif /* XXH_VECTOR == XXH_VSX */
-
-
-/* prefetch
- * can be disabled, by declaring XXH_NO_PREFETCH build macro */
-#if defined(XXH_NO_PREFETCH)
-#  define XXH_PREFETCH(ptr)  (void)(ptr)  /* disabled */
-#else
-#  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))  /* _mm_prefetch() not defined outside of x86/x64 */
-#    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
-#    define XXH_PREFETCH(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
-#  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
-#    define XXH_PREFETCH(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
-#  else
-#    define XXH_PREFETCH(ptr) (void)(ptr)  /* disabled */
-#  endif
-#endif  /* XXH_NO_PREFETCH */
-
-
-/* ==========================================
- * XXH3 default settings
- * ========================================== */
-
-#define XXH_SECRET_DEFAULT_SIZE 192   /* minimum XXH3_SECRET_SIZE_MIN */
-
-#if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)
-#  error "default keyset is not large enough"
-#endif
-
-/*! Pseudorandom secret taken directly from FARSH. */
-XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {
-    0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
-    0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
-    0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
-    0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
-    0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
-    0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
-    0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
-    0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
-    0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
-    0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
-    0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
-    0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
-};
-
-
-#ifdef XXH_OLD_NAMES
-#  define kSecret XXH3_kSecret
-#endif
-
-#ifdef XXH_DOXYGEN
-/*!
- * @brief Calculates a 32-bit to 64-bit long multiply.
- *
- * Implemented as a macro.
- *
- * Wraps `__emulu` on MSVC x86 because it tends to call `__allmul` when it doesn't
- * need to (but it shouldn't need to anyways, it is about 7 instructions to do
- * a 64x64 multiply...). Since we know that this will _always_ emit `MULL`, we
- * use that instead of the normal method.
- *
- * If you are compiling for platforms like Thumb-1 and don't have a better option,
- * you may also want to write your own long multiply routine here.
- *
- * @param x, y Numbers to be multiplied
- * @return 64-bit product of the low 32 bits of @p x and @p y.
- */
-XXH_FORCE_INLINE xxh_u64
-XXH_mult32to64(xxh_u64 x, xxh_u64 y)
-{
-   return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
-}
-#elif defined(_MSC_VER) && defined(_M_IX86)
-#    define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
-#else
-/*
- * Downcast + upcast is usually better than masking on older compilers like
- * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers.
- *
- * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands
- * and perform a full 64x64 multiply -- entirely redundant on 32-bit.
- */
-#    define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y))
-#endif
-
-/*!
- * @brief Calculates a 64->128-bit long multiply.
- *
- * Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar
- * version.
- *
- * @param lhs , rhs The 64-bit integers to be multiplied
- * @return The 128-bit result represented in an @ref XXH128_hash_t.
- */
-static XXH128_hash_t
-XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
-{
-    /*
-     * GCC/Clang __uint128_t method.
-     *
-     * On most 64-bit targets, GCC and Clang define a __uint128_t type.
-     * This is usually the best way as it usually uses a native long 64-bit
-     * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64.
-     *
-     * Usually.
-     *
-     * Despite being a 32-bit platform, Clang (and emscripten) define this type
-     * despite not having the arithmetic for it. This results in a laggy
-     * compiler builtin call which calculates a full 128-bit multiply.
-     * In that case it is best to use the portable one.
-     * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
-     */
-#if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \
-    && defined(__SIZEOF_INT128__) \
-    || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
-
-    __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs;
-    XXH128_hash_t r128;
-    r128.low64  = (xxh_u64)(product);
-    r128.high64 = (xxh_u64)(product >> 64);
-    return r128;
-
-    /*
-     * MSVC for x64's _umul128 method.
-     *
-     * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct);
-     *
-     * This compiles to single operand MUL on x64.
-     */
-#elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
-
-#ifndef _MSC_VER
-#   pragma intrinsic(_umul128)
-#endif
-    xxh_u64 product_high;
-    xxh_u64 const product_low = _umul128(lhs, rhs, &product_high);
-    XXH128_hash_t r128;
-    r128.low64  = product_low;
-    r128.high64 = product_high;
-    return r128;
-
-    /*
-     * MSVC for ARM64's __umulh method.
-     *
-     * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.
-     */
-#elif defined(_M_ARM64) || defined(_M_ARM64EC)
-
-#ifndef _MSC_VER
-#   pragma intrinsic(__umulh)
-#endif
-    XXH128_hash_t r128;
-    r128.low64  = lhs * rhs;
-    r128.high64 = __umulh(lhs, rhs);
-    return r128;
-
-#else
-    /*
-     * Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
-     *
-     * This is a fast and simple grade school multiply, which is shown below
-     * with base 10 arithmetic instead of base 0x100000000.
-     *
-     *           9 3 // D2 lhs = 93
-     *         x 7 5 // D2 rhs = 75
-     *     ----------
-     *           1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15
-     *         4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45
-     *         2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21
-     *     + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63
-     *     ---------
-     *         2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27
-     *     + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67
-     *     ---------
-     *       6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975
-     *
-     * The reasons for adding the products like this are:
-     *  1. It avoids manual carry tracking. Just like how
-     *     (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX.
-     *     This avoids a lot of complexity.
-     *
-     *  2. It hints for, and on Clang, compiles to, the powerful UMAAL
-     *     instruction available in ARM's Digital Signal Processing extension
-     *     in 32-bit ARMv6 and later, which is shown below:
-     *
-     *         void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm)
-     *         {
-     *             xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm;
-     *             *RdLo = (xxh_u32)(product & 0xFFFFFFFF);
-     *             *RdHi = (xxh_u32)(product >> 32);
-     *         }
-     *
-     *     This instruction was designed for efficient long multiplication, and
-     *     allows this to be calculated in only 4 instructions at speeds
-     *     comparable to some 64-bit ALUs.
-     *
-     *  3. It isn't terrible on other platforms. Usually this will be a couple
-     *     of 32-bit ADD/ADCs.
-     */
-
-    /* First calculate all of the cross products. */
-    xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF);
-    xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32,        rhs & 0xFFFFFFFF);
-    xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32);
-    xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32,        rhs >> 32);
-
-    /* Now add the products together. These will never overflow. */
-    xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
-    xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32)        + hi_hi;
-    xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
-
-    XXH128_hash_t r128;
-    r128.low64  = lower;
-    r128.high64 = upper;
-    return r128;
-#endif
-}
-
-/*!
- * @brief Calculates a 64-bit to 128-bit multiply, then XOR folds it.
- *
- * The reason for the separate function is to prevent passing too many structs
- * around by value. This will hopefully inline the multiply, but we don't force it.
- *
- * @param lhs , rhs The 64-bit integers to multiply
- * @return The low 64 bits of the product XOR'd by the high 64 bits.
- * @see XXH_mult64to128()
- */
-static xxh_u64
-XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
-{
-    XXH128_hash_t product = XXH_mult64to128(lhs, rhs);
-    return product.low64 ^ product.high64;
-}
-
-/*! Seems to produce slightly better code on GCC for some reason. */
-XXH_FORCE_INLINE xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
-{
-    XXH_ASSERT(0 <= shift && shift < 64);
-    return v64 ^ (v64 >> shift);
-}
-
-/*
- * This is a fast avalanche stage,
- * suitable when input bits are already partially mixed
- */
-static XXH64_hash_t XXH3_avalanche(xxh_u64 h64)
-{
-    h64 = XXH_xorshift64(h64, 37);
-    h64 *= 0x165667919E3779F9ULL;
-    h64 = XXH_xorshift64(h64, 32);
-    return h64;
-}
-
-/*
- * This is a stronger avalanche,
- * inspired by Pelle Evensen's rrmxmx
- * preferable when input has not been previously mixed
- */
-static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
-{
-    /* this mix is inspired by Pelle Evensen's rrmxmx */
-    h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24);
-    h64 *= 0x9FB21C651E98DF25ULL;
-    h64 ^= (h64 >> 35) + len ;
-    h64 *= 0x9FB21C651E98DF25ULL;
-    return XXH_xorshift64(h64, 28);
-}
-
-
-/* ==========================================
- * Short keys
- * ==========================================
- * One of the shortcomings of XXH32 and XXH64 was that their performance was
- * sub-optimal on short lengths. It used an iterative algorithm which strongly
- * favored lengths that were a multiple of 4 or 8.
- *
- * Instead of iterating over individual inputs, we use a set of single shot
- * functions which piece together a range of lengths and operate in constant time.
- *
- * Additionally, the number of multiplies has been significantly reduced. This
- * reduces latency, especially when emulating 64-bit multiplies on 32-bit.
- *
- * Depending on the platform, this may or may not be faster than XXH32, but it
- * is almost guaranteed to be faster than XXH64.
- */
-
-/*
- * At very short lengths, there isn't enough input to fully hide secrets, or use
- * the entire secret.
- *
- * There is also only a limited amount of mixing we can do before significantly
- * impacting performance.
- *
- * Therefore, we use different sections of the secret and always mix two secret
- * samples with an XOR. This should have no effect on performance on the
- * seedless or withSeed variants because everything _should_ be constant folded
- * by modern compilers.
- *
- * The XOR mixing hides individual parts of the secret and increases entropy.
- *
- * This adds an extra layer of strength for custom secrets.
- */
-XXH_FORCE_INLINE XXH64_hash_t
-XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
-{
-    XXH_ASSERT(input != NULL);
-    XXH_ASSERT(1 <= len && len <= 3);
-    XXH_ASSERT(secret != NULL);
-    /*
-     * len = 1: combined = { input[0], 0x01, input[0], input[0] }
-     * len = 2: combined = { input[1], 0x02, input[0], input[1] }
-     * len = 3: combined = { input[2], 0x03, input[0], input[1] }
-     */
-    {   xxh_u8  const c1 = input[0];
-        xxh_u8  const c2 = input[len >> 1];
-        xxh_u8  const c3 = input[len - 1];
-        xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2  << 24)
-                               | ((xxh_u32)c3 <<  0) | ((xxh_u32)len << 8);
-        xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
-        xxh_u64 const keyed = (xxh_u64)combined ^ bitflip;
-        return XXH64_avalanche(keyed);
-    }
-}
-
-XXH_FORCE_INLINE XXH64_hash_t
-XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
-{
-    XXH_ASSERT(input != NULL);
-    XXH_ASSERT(secret != NULL);
-    XXH_ASSERT(4 <= len && len <= 8);
-    seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
-    {   xxh_u32 const input1 = XXH_readLE32(input);
-        xxh_u32 const input2 = XXH_readLE32(input + len - 4);
-        xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed;
-        xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32);
-        xxh_u64 const keyed = input64 ^ bitflip;
-        return XXH3_rrmxmx(keyed, len);
-    }
-}
-
-XXH_FORCE_INLINE XXH64_hash_t
-XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
-{
-    XXH_ASSERT(input != NULL);
-    XXH_ASSERT(secret != NULL);
-    XXH_ASSERT(9 <= len && len <= 16);
-    {   xxh_u64 const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed;
-        xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed;
-        xxh_u64 const input_lo = XXH_readLE64(input)           ^ bitflip1;
-        xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2;
-        xxh_u64 const acc = len
-                          + XXH_swap64(input_lo) + input_hi
-                          + XXH3_mul128_fold64(input_lo, input_hi);
-        return XXH3_avalanche(acc);
-    }
-}
-
-XXH_FORCE_INLINE XXH64_hash_t
-XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
-{
-    XXH_ASSERT(len <= 16);
-    {   if (XXH_likely(len >  8)) return XXH3_len_9to16_64b(input, len, secret, seed);
-        if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed);
-        if (len) return XXH3_len_1to3_64b(input, len, secret, seed);
-        return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64)));
-    }
-}
-
-/*
- * DISCLAIMER: There are known *seed-dependent* multicollisions here due to
- * multiplication by zero, affecting hashes of lengths 17 to 240.
- *
- * However, they are very unlikely.
- *
- * Keep this in mind when using the unseeded XXH3_64bits() variant: As with all
- * unseeded non-cryptographic hashes, it does not attempt to defend itself
- * against specially crafted inputs, only random inputs.
- *
- * Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes
- * cancelling out the secret is taken an arbitrary number of times (addressed
- * in XXH3_accumulate_512), this collision is very unlikely with random inputs
- * and/or proper seeding:
- *
- * This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a
- * function that is only called up to 16 times per hash with up to 240 bytes of
- * input.
- *
- * This is not too bad for a non-cryptographic hash function, especially with
- * only 64 bit outputs.
- *
- * The 128-bit variant (which trades some speed for strength) is NOT affected
- * by this, although it is always a good idea to use a proper seed if you care
- * about strength.
- */
-XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,
-                                     const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64)
-{
-#if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
-  && defined(__i386__) && defined(__SSE2__)  /* x86 + SSE2 */ \
-  && !defined(XXH_ENABLE_AUTOVECTORIZE)      /* Define to disable like XXH32 hack */
-    /*
-     * UGLY HACK:
-     * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in
-     * slower code.
-     *
-     * By forcing seed64 into a register, we disrupt the cost model and
-     * cause it to scalarize. See `XXH32_round()`
-     *
-     * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600,
-     * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on
-     * GCC 9.2, despite both emitting scalar code.
-     *
-     * GCC generates much better scalar code than Clang for the rest of XXH3,
-     * which is why finding a more optimal codepath is an interest.
-     */
-    XXH_COMPILER_GUARD(seed64);
-#endif
-    {   xxh_u64 const input_lo = XXH_readLE64(input);
-        xxh_u64 const input_hi = XXH_readLE64(input+8);
-        return XXH3_mul128_fold64(
-            input_lo ^ (XXH_readLE64(secret)   + seed64),
-            input_hi ^ (XXH_readLE64(secret+8) - seed64)
-        );
-    }
-}
-
-/* For mid range keys, XXH3 uses a Mum-hash variant. */
-XXH_FORCE_INLINE XXH64_hash_t
-XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
-                     const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
-                     XXH64_hash_t seed)
-{
-    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
-    XXH_ASSERT(16 < len && len <= 128);
-
-    {   xxh_u64 acc = len * XXH_PRIME64_1;
-        if (len > 32) {
-            if (len > 64) {
-                if (len > 96) {
-                    acc += XXH3_mix16B(input+48, secret+96, seed);
-                    acc += XXH3_mix16B(input+len-64, secret+112, seed);
-                }
-                acc += XXH3_mix16B(input+32, secret+64, seed);
-                acc += XXH3_mix16B(input+len-48, secret+80, seed);
-            }
-            acc += XXH3_mix16B(input+16, secret+32, seed);
-            acc += XXH3_mix16B(input+len-32, secret+48, seed);
-        }
-        acc += XXH3_mix16B(input+0, secret+0, seed);
-        acc += XXH3_mix16B(input+len-16, secret+16, seed);
-
-        return XXH3_avalanche(acc);
-    }
-}
-
-#define XXH3_MIDSIZE_MAX 240
-
-XXH_NO_INLINE XXH64_hash_t
-XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
-                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
-                      XXH64_hash_t seed)
-{
-    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
-    XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
-
-    #define XXH3_MIDSIZE_STARTOFFSET 3
-    #define XXH3_MIDSIZE_LASTOFFSET  17
-
-    {   xxh_u64 acc = len * XXH_PRIME64_1;
-        int const nbRounds = (int)len / 16;
-        int i;
-        for (i=0; i<8; i++) {
-            acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);
-        }
-        acc = XXH3_avalanche(acc);
-        XXH_ASSERT(nbRounds >= 8);
-#if defined(__clang__)                                /* Clang */ \
-    && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
-    && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */
-        /*
-         * UGLY HACK:
-         * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86.
-         * In everywhere else, it uses scalar code.
-         *
-         * For 64->128-bit multiplies, even if the NEON was 100% optimal, it
-         * would still be slower than UMAAL (see XXH_mult64to128).
-         *
-         * Unfortunately, Clang doesn't handle the long multiplies properly and
-         * converts them to the nonexistent "vmulq_u64" intrinsic, which is then
-         * scalarized into an ugly mess of VMOV.32 instructions.
-         *
-         * This mess is difficult to avoid without turning autovectorization
-         * off completely, but they are usually relatively minor and/or not
-         * worth it to fix.
-         *
-         * This loop is the easiest to fix, as unlike XXH32, this pragma
-         * _actually works_ because it is a loop vectorization instead of an
-         * SLP vectorization.
-         */
-        #pragma clang loop vectorize(disable)
-#endif
-        for (i=8 ; i < nbRounds; i++) {
-            acc += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
-        }
-        /* last bytes */
-        acc += XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
-        return XXH3_avalanche(acc);
-    }
-}
-
-
-/* =======     Long Keys     ======= */
-
-#define XXH_STRIPE_LEN 64
-#define XXH_SECRET_CONSUME_RATE 8   /* nb of secret bytes consumed at each accumulation */
-#define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64))
-
-#ifdef XXH_OLD_NAMES
-#  define STRIPE_LEN XXH_STRIPE_LEN
-#  define ACC_NB XXH_ACC_NB
-#endif
-
-XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
-{
-    if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
-    XXH_memcpy(dst, &v64, sizeof(v64));
-}
-
-/* Several intrinsic functions below are supposed to accept __int64 as argument,
- * as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ .
- * However, several environments do not define __int64 type,
- * requiring a workaround.
- */
-#if !defined (__VMS) \
-  && (defined (__cplusplus) \
-  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-    typedef int64_t xxh_i64;
-#else
-    /* the following type must have a width of 64-bit */
-    typedef long long xxh_i64;
-#endif
-
-
-/*
- * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.
- *
- * It is a hardened version of UMAC, based off of FARSH's implementation.
- *
- * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD
- * implementations, and it is ridiculously fast.
- *
- * We harden it by mixing the original input to the accumulators as well as the product.
- *
- * This means that in the (relatively likely) case of a multiply by zero, the
- * original input is preserved.
- *
- * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve
- * cross-pollination, as otherwise the upper and lower halves would be
- * essentially independent.
- *
- * This doesn't matter on 64-bit hashes since they all get merged together in
- * the end, so we skip the extra step.
- *
- * Both XXH3_64bits and XXH3_128bits use this subroutine.
- */
-
-#if (XXH_VECTOR == XXH_AVX512) \
-     || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0)
-
-#ifndef XXH_TARGET_AVX512
-# define XXH_TARGET_AVX512  /* disable attribute target */
-#endif
-
-XXH_FORCE_INLINE XXH_TARGET_AVX512 void
-XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
-                     const void* XXH_RESTRICT input,
-                     const void* XXH_RESTRICT secret)
-{
-    __m512i* const xacc = (__m512i *) acc;
-    XXH_ASSERT((((size_t)acc) & 63) == 0);
-    XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
-
-    {
-        /* data_vec    = input[0]; */
-        __m512i const data_vec    = _mm512_loadu_si512   (input);
-        /* key_vec     = secret[0]; */
-        __m512i const key_vec     = _mm512_loadu_si512   (secret);
-        /* data_key    = data_vec ^ key_vec; */
-        __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);
-        /* data_key_lo = data_key >> 32; */
-        __m512i const data_key_lo = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
-        /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
-        __m512i const product     = _mm512_mul_epu32     (data_key, data_key_lo);
-        /* xacc[0] += swap(data_vec); */
-        __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2));
-        __m512i const sum       = _mm512_add_epi64(*xacc, data_swap);
-        /* xacc[0] += product; */
-        *xacc = _mm512_add_epi64(product, sum);
-    }
-}
-
-/*
- * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing.
- *
- * Multiplication isn't perfect, as explained by Google in HighwayHash:
- *
- *  // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to
- *  // varying degrees. In descending order of goodness, bytes
- *  // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32.
- *  // As expected, the upper and lower bytes are much worse.
- *
- * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291
- *
- * Since our algorithm uses a pseudorandom secret to add some variance into the
- * mix, we don't need to (or want to) mix as often or as much as HighwayHash does.
- *
- * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid
- * extraction.
- *
- * Both XXH3_64bits and XXH3_128bits use this subroutine.
- */
-
-XXH_FORCE_INLINE XXH_TARGET_AVX512 void
-XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
-{
-    XXH_ASSERT((((size_t)acc) & 63) == 0);
-    XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
-    {   __m512i* const xacc = (__m512i*) acc;
-        const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);
-
-        /* xacc[0] ^= (xacc[0] >> 47) */
-        __m512i const acc_vec     = *xacc;
-        __m512i const shifted     = _mm512_srli_epi64    (acc_vec, 47);
-        __m512i const data_vec    = _mm512_xor_si512     (acc_vec, shifted);
-        /* xacc[0] ^= secret; */
-        __m512i const key_vec     = _mm512_loadu_si512   (secret);
-        __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);
-
-        /* xacc[0] *= XXH_PRIME32_1; */
-        __m512i const data_key_hi = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
-        __m512i const prod_lo     = _mm512_mul_epu32     (data_key, prime32);
-        __m512i const prod_hi     = _mm512_mul_epu32     (data_key_hi, prime32);
-        *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
-    }
-}
-
-XXH_FORCE_INLINE XXH_TARGET_AVX512 void
-XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
-{
-    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0);
-    XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64);
-    XXH_ASSERT(((size_t)customSecret & 63) == 0);
-    (void)(&XXH_writeLE64);
-    {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);
-        __m512i const seed = _mm512_mask_set1_epi64(_mm512_set1_epi64((xxh_i64)seed64), 0xAA, (xxh_i64)(0U - seed64));
-
-        const __m512i* const src  = (const __m512i*) ((const void*) XXH3_kSecret);
-              __m512i* const dest = (      __m512i*) customSecret;
-        int i;
-        XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */
-        XXH_ASSERT(((size_t)dest & 63) == 0);
-        for (i=0; i < nbRounds; ++i) {
-            /* GCC has a bug, _mm512_stream_load_si512 accepts 'void*', not 'void const*',
-             * this will warn "discards 'const' qualifier". */
-            union {
-                const __m512i* cp;
-                void* p;
-            } remote_const_void;
-            remote_const_void.cp = src + i;
-            dest[i] = _mm512_add_epi64(_mm512_stream_load_si512(remote_const_void.p), seed);
-    }   }
-}
-
-#endif
-
-#if (XXH_VECTOR == XXH_AVX2) \
-    || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0)
-
-#ifndef XXH_TARGET_AVX2
-# define XXH_TARGET_AVX2  /* disable attribute target */
-#endif
-
-XXH_FORCE_INLINE XXH_TARGET_AVX2 void
-XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
-                    const void* XXH_RESTRICT input,
-                    const void* XXH_RESTRICT secret)
-{
-    XXH_ASSERT((((size_t)acc) & 31) == 0);
-    {   __m256i* const xacc    =       (__m256i *) acc;
-        /* Unaligned. This is mainly for pointer arithmetic, and because
-         * _mm256_loadu_si256 requires  a const __m256i * pointer for some reason. */
-        const         __m256i* const xinput  = (const __m256i *) input;
-        /* Unaligned. This is mainly for pointer arithmetic, and because
-         * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
-        const         __m256i* const xsecret = (const __m256i *) secret;
-
-        size_t i;
-        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
-            /* data_vec    = xinput[i]; */
-            __m256i const data_vec    = _mm256_loadu_si256    (xinput+i);
-            /* key_vec     = xsecret[i]; */
-            __m256i const key_vec     = _mm256_loadu_si256   (xsecret+i);
-            /* data_key    = data_vec ^ key_vec; */
-            __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
-            /* data_key_lo = data_key >> 32; */
-            __m256i const data_key_lo = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
-            /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
-            __m256i const product     = _mm256_mul_epu32     (data_key, data_key_lo);
-            /* xacc[i] += swap(data_vec); */
-            __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));
-            __m256i const sum       = _mm256_add_epi64(xacc[i], data_swap);
-            /* xacc[i] += product; */
-            xacc[i] = _mm256_add_epi64(product, sum);
-    }   }
-}
-
-XXH_FORCE_INLINE XXH_TARGET_AVX2 void
-XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
-{
-    XXH_ASSERT((((size_t)acc) & 31) == 0);
-    {   __m256i* const xacc = (__m256i*) acc;
-        /* Unaligned. This is mainly for pointer arithmetic, and because
-         * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
-        const         __m256i* const xsecret = (const __m256i *) secret;
-        const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1);
-
-        size_t i;
-        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
-            /* xacc[i] ^= (xacc[i] >> 47) */
-            __m256i const acc_vec     = xacc[i];
-            __m256i const shifted     = _mm256_srli_epi64    (acc_vec, 47);
-            __m256i const data_vec    = _mm256_xor_si256     (acc_vec, shifted);
-            /* xacc[i] ^= xsecret; */
-            __m256i const key_vec     = _mm256_loadu_si256   (xsecret+i);
-            __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
-
-            /* xacc[i] *= XXH_PRIME32_1; */
-            __m256i const data_key_hi = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
-            __m256i const prod_lo     = _mm256_mul_epu32     (data_key, prime32);
-            __m256i const prod_hi     = _mm256_mul_epu32     (data_key_hi, prime32);
-            xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
-        }
-    }
-}
-
-XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
-{
-    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0);
-    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6);
-    XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64);
-    (void)(&XXH_writeLE64);
-    XXH_PREFETCH(customSecret);
-    {   __m256i const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64);
-
-        const __m256i* const src  = (const __m256i*) ((const void*) XXH3_kSecret);
-              __m256i*       dest = (      __m256i*) customSecret;
-
-#       if defined(__GNUC__) || defined(__clang__)
-        /*
-         * On GCC & Clang, marking 'dest' as modified will cause the compiler:
-         *   - do not extract the secret from sse registers in the internal loop
-         *   - use less common registers, and avoid pushing these reg into stack
-         */
-        XXH_COMPILER_GUARD(dest);
-#       endif
-        XXH_ASSERT(((size_t)src & 31) == 0); /* control alignment */
-        XXH_ASSERT(((size_t)dest & 31) == 0);
-
-        /* GCC -O2 need unroll loop manually */
-        dest[0] = _mm256_add_epi64(_mm256_stream_load_si256(src+0), seed);
-        dest[1] = _mm256_add_epi64(_mm256_stream_load_si256(src+1), seed);
-        dest[2] = _mm256_add_epi64(_mm256_stream_load_si256(src+2), seed);
-        dest[3] = _mm256_add_epi64(_mm256_stream_load_si256(src+3), seed);
-        dest[4] = _mm256_add_epi64(_mm256_stream_load_si256(src+4), seed);
-        dest[5] = _mm256_add_epi64(_mm256_stream_load_si256(src+5), seed);
-    }
-}
-
-#endif
-
-/* x86dispatch always generates SSE2 */
-#if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH)
-
-#ifndef XXH_TARGET_SSE2
-# define XXH_TARGET_SSE2  /* disable attribute target */
-#endif
-
-XXH_FORCE_INLINE XXH_TARGET_SSE2 void
-XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
-                    const void* XXH_RESTRICT input,
-                    const void* XXH_RESTRICT secret)
-{
-    /* SSE2 is just a half-scale version of the AVX2 version. */
-    XXH_ASSERT((((size_t)acc) & 15) == 0);
-    {   __m128i* const xacc    =       (__m128i *) acc;
-        /* Unaligned. This is mainly for pointer arithmetic, and because
-         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
-        const         __m128i* const xinput  = (const __m128i *) input;
-        /* Unaligned. This is mainly for pointer arithmetic, and because
-         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
-        const         __m128i* const xsecret = (const __m128i *) secret;
-
-        size_t i;
-        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
-            /* data_vec    = xinput[i]; */
-            __m128i const data_vec    = _mm_loadu_si128   (xinput+i);
-            /* key_vec     = xsecret[i]; */
-            __m128i const key_vec     = _mm_loadu_si128   (xsecret+i);
-            /* data_key    = data_vec ^ key_vec; */
-            __m128i const data_key    = _mm_xor_si128     (data_vec, key_vec);
-            /* data_key_lo = data_key >> 32; */
-            __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
-            /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
-            __m128i const product     = _mm_mul_epu32     (data_key, data_key_lo);
-            /* xacc[i] += swap(data_vec); */
-            __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));
-            __m128i const sum       = _mm_add_epi64(xacc[i], data_swap);
-            /* xacc[i] += product; */
-            xacc[i] = _mm_add_epi64(product, sum);
-    }   }
-}
-
-XXH_FORCE_INLINE XXH_TARGET_SSE2 void
-XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
-{
-    XXH_ASSERT((((size_t)acc) & 15) == 0);
-    {   __m128i* const xacc = (__m128i*) acc;
-        /* Unaligned. This is mainly for pointer arithmetic, and because
-         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
-        const         __m128i* const xsecret = (const __m128i *) secret;
-        const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1);
-
-        size_t i;
-        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
-            /* xacc[i] ^= (xacc[i] >> 47) */
-            __m128i const acc_vec     = xacc[i];
-            __m128i const shifted     = _mm_srli_epi64    (acc_vec, 47);
-            __m128i const data_vec    = _mm_xor_si128     (acc_vec, shifted);
-            /* xacc[i] ^= xsecret[i]; */
-            __m128i const key_vec     = _mm_loadu_si128   (xsecret+i);
-            __m128i const data_key    = _mm_xor_si128     (data_vec, key_vec);
-
-            /* xacc[i] *= XXH_PRIME32_1; */
-            __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
-            __m128i const prod_lo     = _mm_mul_epu32     (data_key, prime32);
-            __m128i const prod_hi     = _mm_mul_epu32     (data_key_hi, prime32);
-            xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));
-        }
-    }
-}
-
-XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
-{
-    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
-    (void)(&XXH_writeLE64);
-    {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i);
-
-#       if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
-        /* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */
-        XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) };
-        __m128i const seed = _mm_load_si128((__m128i const*)seed64x2);
-#       else
-        __m128i const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64);
-#       endif
-        int i;
-
-        const void* const src16 = XXH3_kSecret;
-        __m128i* dst16 = (__m128i*) customSecret;
-#       if defined(__GNUC__) || defined(__clang__)
-        /*
-         * On GCC & Clang, marking 'dest' as modified will cause the compiler:
-         *   - do not extract the secret from sse registers in the internal loop
-         *   - use less common registers, and avoid pushing these reg into stack
-         */
-        XXH_COMPILER_GUARD(dst16);
-#       endif
-        XXH_ASSERT(((size_t)src16 & 15) == 0); /* control alignment */
-        XXH_ASSERT(((size_t)dst16 & 15) == 0);
-
-        for (i=0; i < nbRounds; ++i) {
-            dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i *)src16+i), seed);
-    }   }
-}
-
-#endif
-
-#if (XXH_VECTOR == XXH_NEON)
-
-/* forward declarations for the scalar routines */
-XXH_FORCE_INLINE void
-XXH3_scalarRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT input,
-                 void const* XXH_RESTRICT secret, size_t lane);
-
-XXH_FORCE_INLINE void
-XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
-                         void const* XXH_RESTRICT secret, size_t lane);
-
-/*!
- * @internal
- * @brief The bulk processing loop for NEON.
- *
- * The NEON code path is actually partially scalar when running on AArch64. This
- * is to optimize the pipelining and can have up to 15% speedup depending on the
- * CPU, and it also mitigates some GCC codegen issues.
- *
- * @see XXH3_NEON_LANES for configuring this and details about this optimization.
- */
-XXH_FORCE_INLINE void
-XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
-                    const void* XXH_RESTRICT input,
-                    const void* XXH_RESTRICT secret)
-{
-    XXH_ASSERT((((size_t)acc) & 15) == 0);
-    XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);
-    {
-        uint64x2_t* const xacc = (uint64x2_t *) acc;
-        /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
-        uint8_t const* const xinput = (const uint8_t *) input;
-        uint8_t const* const xsecret  = (const uint8_t *) secret;
-
-        size_t i;
-        /* NEON for the first few lanes (these loops are normally interleaved) */
-        for (i=0; i < XXH3_NEON_LANES / 2; i++) {
-            /* data_vec = xinput[i]; */
-            uint8x16_t data_vec    = vld1q_u8(xinput  + (i * 16));
-            /* key_vec  = xsecret[i];  */
-            uint8x16_t key_vec     = vld1q_u8(xsecret + (i * 16));
-            uint64x2_t data_key;
-            uint32x2_t data_key_lo, data_key_hi;
-            /* xacc[i] += swap(data_vec); */
-            uint64x2_t const data64  = vreinterpretq_u64_u8(data_vec);
-            uint64x2_t const swapped = vextq_u64(data64, data64, 1);
-            xacc[i] = vaddq_u64 (xacc[i], swapped);
-            /* data_key = data_vec ^ key_vec; */
-            data_key = vreinterpretq_u64_u8(veorq_u8(data_vec, key_vec));
-            /* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF);
-             * data_key_hi = (uint32x2_t) (data_key >> 32);
-             * data_key = UNDEFINED; */
-            XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
-            /* xacc[i] += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
-            xacc[i] = vmlal_u32 (xacc[i], data_key_lo, data_key_hi);
-
-        }
-        /* Scalar for the remainder. This may be a zero iteration loop. */
-        for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
-            XXH3_scalarRound(acc, input, secret, i);
-        }
-    }
-}
-
-XXH_FORCE_INLINE void
-XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
-{
-    XXH_ASSERT((((size_t)acc) & 15) == 0);
-
-    {   uint64x2_t* xacc       = (uint64x2_t*) acc;
-        uint8_t const* xsecret = (uint8_t const*) secret;
-        uint32x2_t prime       = vdup_n_u32 (XXH_PRIME32_1);
-
-        size_t i;
-        /* NEON for the first few lanes (these loops are normally interleaved) */
-        for (i=0; i < XXH3_NEON_LANES / 2; i++) {
-            /* xacc[i] ^= (xacc[i] >> 47); */
-            uint64x2_t acc_vec  = xacc[i];
-            uint64x2_t shifted  = vshrq_n_u64 (acc_vec, 47);
-            uint64x2_t data_vec = veorq_u64   (acc_vec, shifted);
-
-            /* xacc[i] ^= xsecret[i]; */
-            uint8x16_t key_vec  = vld1q_u8    (xsecret + (i * 16));
-            uint64x2_t data_key = veorq_u64   (data_vec, vreinterpretq_u64_u8(key_vec));
-
-            /* xacc[i] *= XXH_PRIME32_1 */
-            uint32x2_t data_key_lo, data_key_hi;
-            /* data_key_lo = (uint32x2_t) (xacc[i] & 0xFFFFFFFF);
-             * data_key_hi = (uint32x2_t) (xacc[i] >> 32);
-             * xacc[i] = UNDEFINED; */
-            XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
-            {   /*
-                 * prod_hi = (data_key >> 32) * XXH_PRIME32_1;
-                 *
-                 * Avoid vmul_u32 + vshll_n_u32 since Clang 6 and 7 will
-                 * incorrectly "optimize" this:
-                 *   tmp     = vmul_u32(vmovn_u64(a), vmovn_u64(b));
-                 *   shifted = vshll_n_u32(tmp, 32);
-                 * to this:
-                 *   tmp     = "vmulq_u64"(a, b); // no such thing!
-                 *   shifted = vshlq_n_u64(tmp, 32);
-                 *
-                 * However, unlike SSE, Clang lacks a 64-bit multiply routine
-                 * for NEON, and it scalarizes two 64-bit multiplies instead.
-                 *
-                 * vmull_u32 has the same timing as vmul_u32, and it avoids
-                 * this bug completely.
-                 * See https://bugs.llvm.org/show_bug.cgi?id=39967
-                 */
-                uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
-                /* xacc[i] = prod_hi << 32; */
-                xacc[i] = vshlq_n_u64(prod_hi, 32);
-                /* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
-                xacc[i] = vmlal_u32(xacc[i], data_key_lo, prime);
-            }
-        }
-        /* Scalar for the remainder. This may be a zero iteration loop. */
-        for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
-            XXH3_scalarScrambleRound(acc, secret, i);
-        }
-    }
-}
-
-#endif
-
-#if (XXH_VECTOR == XXH_VSX)
-
-XXH_FORCE_INLINE void
-XXH3_accumulate_512_vsx(  void* XXH_RESTRICT acc,
-                    const void* XXH_RESTRICT input,
-                    const void* XXH_RESTRICT secret)
-{
-    /* presumed aligned */
-    unsigned int* const xacc = (unsigned int*) acc;
-    xxh_u64x2 const* const xinput   = (xxh_u64x2 const*) input;   /* no alignment restriction */
-    xxh_u64x2 const* const xsecret  = (xxh_u64x2 const*) secret;    /* no alignment restriction */
-    xxh_u64x2 const v32 = { 32, 32 };
-    size_t i;
-    for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
-        /* data_vec = xinput[i]; */
-        xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + i);
-        /* key_vec = xsecret[i]; */
-        xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + i);
-        xxh_u64x2 const data_key = data_vec ^ key_vec;
-        /* shuffled = (data_key << 32) | (data_key >> 32); */
-        xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
-        /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
-        xxh_u64x2 const product  = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
-        /* acc_vec = xacc[i]; */
-        xxh_u64x2 acc_vec        = (xxh_u64x2)vec_xl(0, xacc + 4 * i);
-        acc_vec += product;
-
-        /* swap high and low halves */
-#ifdef __s390x__
-        acc_vec += vec_permi(data_vec, data_vec, 2);
-#else
-        acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
-#endif
-        /* xacc[i] = acc_vec; */
-        vec_xst((xxh_u32x4)acc_vec, 0, xacc + 4 * i);
-    }
-}
-
-XXH_FORCE_INLINE void
-XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
-{
-    XXH_ASSERT((((size_t)acc) & 15) == 0);
-
-    {         xxh_u64x2* const xacc    =       (xxh_u64x2*) acc;
-        const xxh_u64x2* const xsecret = (const xxh_u64x2*) secret;
-        /* constants */
-        xxh_u64x2 const v32  = { 32, 32 };
-        xxh_u64x2 const v47 = { 47, 47 };
-        xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 };
-        size_t i;
-        for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
-            /* xacc[i] ^= (xacc[i] >> 47); */
-            xxh_u64x2 const acc_vec  = xacc[i];
-            xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
-
-            /* xacc[i] ^= xsecret[i]; */
-            xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + i);
-            xxh_u64x2 const data_key = data_vec ^ key_vec;
-
-            /* xacc[i] *= XXH_PRIME32_1 */
-            /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF);  */
-            xxh_u64x2 const prod_even  = XXH_vec_mule((xxh_u32x4)data_key, prime);
-            /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32);  */
-            xxh_u64x2 const prod_odd  = XXH_vec_mulo((xxh_u32x4)data_key, prime);
-            xacc[i] = prod_odd + (prod_even << v32);
-    }   }
-}
-
-#endif
-
-/* scalar variants - universal */
-
-/*!
- * @internal
- * @brief Scalar round for @ref XXH3_accumulate_512_scalar().
- *
- * This is extracted to its own function because the NEON path uses a combination
- * of NEON and scalar.
- */
-XXH_FORCE_INLINE void
-XXH3_scalarRound(void* XXH_RESTRICT acc,
-                 void const* XXH_RESTRICT input,
-                 void const* XXH_RESTRICT secret,
-                 size_t lane)
-{
-    xxh_u64* xacc = (xxh_u64*) acc;
-    xxh_u8 const* xinput  = (xxh_u8 const*) input;
-    xxh_u8 const* xsecret = (xxh_u8 const*) secret;
-    XXH_ASSERT(lane < XXH_ACC_NB);
-    XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
-    {
-        xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8);
-        xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);
-        xacc[lane ^ 1] += data_val; /* swap adjacent lanes */
-        xacc[lane] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
-    }
-}
-
-/*!
- * @internal
- * @brief Processes a 64 byte block of data using the scalar path.
- */
-XXH_FORCE_INLINE void
-XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
-                     const void* XXH_RESTRICT input,
-                     const void* XXH_RESTRICT secret)
-{
-    size_t i;
-    for (i=0; i < XXH_ACC_NB; i++) {
-        XXH3_scalarRound(acc, input, secret, i);
-    }
-}
-
-/*!
- * @internal
- * @brief Scalar scramble step for @ref XXH3_scrambleAcc_scalar().
- *
- * This is extracted to its own function because the NEON path uses a combination
- * of NEON and scalar.
- */
-XXH_FORCE_INLINE void
-XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
-                         void const* XXH_RESTRICT secret,
-                         size_t lane)
-{
-    xxh_u64* const xacc = (xxh_u64*) acc;   /* presumed aligned */
-    const xxh_u8* const xsecret = (const xxh_u8*) secret;   /* no alignment restriction */
-    XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
-    XXH_ASSERT(lane < XXH_ACC_NB);
-    {
-        xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8);
-        xxh_u64 acc64 = xacc[lane];
-        acc64 = XXH_xorshift64(acc64, 47);
-        acc64 ^= key64;
-        acc64 *= XXH_PRIME32_1;
-        xacc[lane] = acc64;
-    }
-}
-
-/*!
- * @internal
- * @brief Scrambles the accumulators after a large chunk has been read
- */
-XXH_FORCE_INLINE void
-XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
-{
-    size_t i;
-    for (i=0; i < XXH_ACC_NB; i++) {
-        XXH3_scalarScrambleRound(acc, secret, i);
-    }
-}
-
-XXH_FORCE_INLINE void
-XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
-{
-    /*
-     * We need a separate pointer for the hack below,
-     * which requires a non-const pointer.
-     * Any decent compiler will optimize this out otherwise.
-     */
-    const xxh_u8* kSecretPtr = XXH3_kSecret;
-    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
-
-#if defined(__clang__) && defined(__aarch64__)
-    /*
-     * UGLY HACK:
-     * Clang generates a bunch of MOV/MOVK pairs for aarch64, and they are
-     * placed sequentially, in order, at the top of the unrolled loop.
-     *
-     * While MOVK is great for generating constants (2 cycles for a 64-bit
-     * constant compared to 4 cycles for LDR), it fights for bandwidth with
-     * the arithmetic instructions.
-     *
-     *   I   L   S
-     * MOVK
-     * MOVK
-     * MOVK
-     * MOVK
-     * ADD
-     * SUB      STR
-     *          STR
-     * By forcing loads from memory (as the asm line causes Clang to assume
-     * that XXH3_kSecretPtr has been changed), the pipelines are used more
-     * efficiently:
-     *   I   L   S
-     *      LDR
-     *  ADD LDR
-     *  SUB     STR
-     *          STR
-     *
-     * See XXH3_NEON_LANES for details on the pipsline.
-     *
-     * XXH3_64bits_withSeed, len == 256, Snapdragon 835
-     *   without hack: 2654.4 MB/s
-     *   with hack:    3202.9 MB/s
-     */
-    XXH_COMPILER_GUARD(kSecretPtr);
-#endif
-    /*
-     * Note: in debug mode, this overrides the asm optimization
-     * and Clang will emit MOVK chains again.
-     */
-    XXH_ASSERT(kSecretPtr == XXH3_kSecret);
-
-    {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
-        int i;
-        for (i=0; i < nbRounds; i++) {
-            /*
-             * The asm hack causes Clang to assume that kSecretPtr aliases with
-             * customSecret, and on aarch64, this prevented LDP from merging two
-             * loads together for free. Putting the loads together before the stores
-             * properly generates LDP.
-             */
-            xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i)     + seed64;
-            xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64;
-            XXH_writeLE64((xxh_u8*)customSecret + 16*i,     lo);
-            XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi);
-    }   }
-}
-
-
-typedef void (*XXH3_f_accumulate_512)(void* XXH_RESTRICT, const void*, const void*);
-typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);
-typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
-
-
-#if (XXH_VECTOR == XXH_AVX512)
-
-#define XXH3_accumulate_512 XXH3_accumulate_512_avx512
-#define XXH3_scrambleAcc    XXH3_scrambleAcc_avx512
-#define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
-
-#elif (XXH_VECTOR == XXH_AVX2)
-
-#define XXH3_accumulate_512 XXH3_accumulate_512_avx2
-#define XXH3_scrambleAcc    XXH3_scrambleAcc_avx2
-#define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
-
-#elif (XXH_VECTOR == XXH_SSE2)
-
-#define XXH3_accumulate_512 XXH3_accumulate_512_sse2
-#define XXH3_scrambleAcc    XXH3_scrambleAcc_sse2
-#define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
-
-#elif (XXH_VECTOR == XXH_NEON)
-
-#define XXH3_accumulate_512 XXH3_accumulate_512_neon
-#define XXH3_scrambleAcc    XXH3_scrambleAcc_neon
-#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
-
-#elif (XXH_VECTOR == XXH_VSX)
-
-#define XXH3_accumulate_512 XXH3_accumulate_512_vsx
-#define XXH3_scrambleAcc    XXH3_scrambleAcc_vsx
-#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
-
-#else /* scalar */
-
-#define XXH3_accumulate_512 XXH3_accumulate_512_scalar
-#define XXH3_scrambleAcc    XXH3_scrambleAcc_scalar
-#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
-
-#endif
-
-
-
-#ifndef XXH_PREFETCH_DIST
-#  ifdef __clang__
-#    define XXH_PREFETCH_DIST 320
-#  else
-#    if (XXH_VECTOR == XXH_AVX512)
-#      define XXH_PREFETCH_DIST 512
-#    else
-#      define XXH_PREFETCH_DIST 384
-#    endif
-#  endif  /* __clang__ */
-#endif  /* XXH_PREFETCH_DIST */
-
-/*
- * XXH3_accumulate()
- * Loops over XXH3_accumulate_512().
- * Assumption: nbStripes will not overflow the secret size
- */
-XXH_FORCE_INLINE void
-XXH3_accumulate(     xxh_u64* XXH_RESTRICT acc,
-                const xxh_u8* XXH_RESTRICT input,
-                const xxh_u8* XXH_RESTRICT secret,
-                      size_t nbStripes,
-                      XXH3_f_accumulate_512 f_acc512)
-{
-    size_t n;
-    for (n = 0; n < nbStripes; n++ ) {
-        const xxh_u8* const in = input + n*XXH_STRIPE_LEN;
-        XXH_PREFETCH(in + XXH_PREFETCH_DIST);
-        f_acc512(acc,
-                 in,
-                 secret + n*XXH_SECRET_CONSUME_RATE);
-    }
-}
-
-XXH_FORCE_INLINE void
-XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
-                      const xxh_u8* XXH_RESTRICT input, size_t len,
-                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
-                            XXH3_f_accumulate_512 f_acc512,
-                            XXH3_f_scrambleAcc f_scramble)
-{
-    size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
-    size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
-    size_t const nb_blocks = (len - 1) / block_len;
-
-    size_t n;
-
-    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
-
-    for (n = 0; n < nb_blocks; n++) {
-        XXH3_accumulate(acc, input + n*block_len, secret, nbStripesPerBlock, f_acc512);
-        f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);
-    }
-
-    /* last partial block */
-    XXH_ASSERT(len > XXH_STRIPE_LEN);
-    {   size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
-        XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
-        XXH3_accumulate(acc, input + nb_blocks*block_len, secret, nbStripes, f_acc512);
-
-        /* last stripe */
-        {   const xxh_u8* const p = input + len - XXH_STRIPE_LEN;
-#define XXH_SECRET_LASTACC_START 7  /* not aligned on 8, last secret is different from acc & scrambler */
-            f_acc512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
-    }   }
-}
-
-XXH_FORCE_INLINE xxh_u64
-XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret)
-{
-    return XXH3_mul128_fold64(
-               acc[0] ^ XXH_readLE64(secret),
-               acc[1] ^ XXH_readLE64(secret+8) );
-}
-
-static XXH64_hash_t
-XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start)
-{
-    xxh_u64 result64 = start;
-    size_t i = 0;
-
-    for (i = 0; i < 4; i++) {
-        result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i);
-#if defined(__clang__)                                /* Clang */ \
-    && (defined(__arm__) || defined(__thumb__))       /* ARMv7 */ \
-    && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */  \
-    && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */
-        /*
-         * UGLY HACK:
-         * Prevent autovectorization on Clang ARMv7-a. Exact same problem as
-         * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b.
-         * XXH3_64bits, len == 256, Snapdragon 835:
-         *   without hack: 2063.7 MB/s
-         *   with hack:    2560.7 MB/s
-         */
-        XXH_COMPILER_GUARD(result64);
-#endif
-    }
-
-    return XXH3_avalanche(result64);
-}
-
-#define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \
-                        XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 }
-
-XXH_FORCE_INLINE XXH64_hash_t
-XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
-                           const void* XXH_RESTRICT secret, size_t secretSize,
-                           XXH3_f_accumulate_512 f_acc512,
-                           XXH3_f_scrambleAcc f_scramble)
-{
-    XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
-
-    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc512, f_scramble);
-
-    /* converge into final hash */
-    XXH_STATIC_ASSERT(sizeof(acc) == 64);
-    /* do not align on 8, so that the secret is different from the accumulator */
-#define XXH_SECRET_MERGEACCS_START 11
-    XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
-    return XXH3_mergeAccs(acc, (const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1);
-}
-
-/*
- * It's important for performance to transmit secret's size (when it's static)
- * so that the compiler can properly optimize the vectorized loop.
- * This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.
- */
-XXH_FORCE_INLINE XXH64_hash_t
-XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
-                             XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
-{
-    (void)seed64;
-    return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate_512, XXH3_scrambleAcc);
-}
-
-/*
- * It's preferable for performance that XXH3_hashLong is not inlined,
- * as it results in a smaller function for small data, easier to the instruction cache.
- * Note that inside this no_inline function, we do inline the internal loop,
- * and provide a statically defined secret size to allow optimization of vector loop.
- */
-XXH_NO_INLINE XXH64_hash_t
-XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
-                          XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
-{
-    (void)seed64; (void)secret; (void)secretLen;
-    return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512, XXH3_scrambleAcc);
-}
-
-/*
- * XXH3_hashLong_64b_withSeed():
- * Generate a custom key based on alteration of default XXH3_kSecret with the seed,
- * and then use this key for long mode hashing.
- *
- * This operation is decently fast but nonetheless costs a little bit of time.
- * Try to avoid it whenever possible (typically when seed==0).
- *
- * It's important for performance that XXH3_hashLong is not inlined. Not sure
- * why (uop cache maybe?), but the difference is large and easily measurable.
- */
-XXH_FORCE_INLINE XXH64_hash_t
-XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
-                                    XXH64_hash_t seed,
-                                    XXH3_f_accumulate_512 f_acc512,
-                                    XXH3_f_scrambleAcc f_scramble,
-                                    XXH3_f_initCustomSecret f_initSec)
-{
-    if (seed == 0)
-        return XXH3_hashLong_64b_internal(input, len,
-                                          XXH3_kSecret, sizeof(XXH3_kSecret),
-                                          f_acc512, f_scramble);
-    {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
-        f_initSec(secret, seed);
-        return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),
-                                          f_acc512, f_scramble);
-    }
-}
-
-/*
- * It's important for performance that XXH3_hashLong is not inlined.
- */
-XXH_NO_INLINE XXH64_hash_t
-XXH3_hashLong_64b_withSeed(const void* input, size_t len,
-                           XXH64_hash_t seed, const xxh_u8* secret, size_t secretLen)
-{
-    (void)secret; (void)secretLen;
-    return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
-                XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);
-}
-
-
-typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t,
-                                          XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t);
-
-XXH_FORCE_INLINE XXH64_hash_t
-XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,
-                     XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
-                     XXH3_hashLong64_f f_hashLong)
-{
-    XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
-    /*
-     * If an action is to be taken if `secretLen` condition is not respected,
-     * it should be done here.
-     * For now, it's a contract pre-condition.
-     * Adding a check and a branch here would cost performance at every hash.
-     * Also, note that function signature doesn't offer room to return an error.
-     */
-    if (len <= 16)
-        return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
-    if (len <= 128)
-        return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
-    if (len <= XXH3_MIDSIZE_MAX)
-        return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
-    return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen);
-}
-
-
-/* ===   Public entry point   === */
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* input, size_t len)
-{
-    return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
-{
-    return XXH3_64bits_internal(input, len, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
-{
-    return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
-}
-
-XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)
-{
-    if (len <= XXH3_MIDSIZE_MAX)
-        return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
-    return XXH3_hashLong_64b_withSecret(input, len, seed, (const xxh_u8*)secret, secretSize);
-}
-
-
-/* ===   XXH3 streaming   === */
-
-/*
- * Malloc's a pointer that is always aligned to align.
- *
- * This must be freed with `XXH_alignedFree()`.
- *
- * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte
- * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2
- * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON.
- *
- * This underalignment previously caused a rather obvious crash which went
- * completely unnoticed due to XXH3_createState() not actually being tested.
- * Credit to RedSpah for noticing this bug.
- *
- * The alignment is done manually: Functions like posix_memalign or _mm_malloc
- * are avoided: To maintain portability, we would have to write a fallback
- * like this anyways, and besides, testing for the existence of library
- * functions without relying on external build tools is impossible.
- *
- * The method is simple: Overallocate, manually align, and store the offset
- * to the original behind the returned pointer.
- *
- * Align must be a power of 2 and 8 <= align <= 128.
- */
-static void* XXH_alignedMalloc(size_t s, size_t align)
-{
-    XXH_ASSERT(align <= 128 && align >= 8); /* range check */
-    XXH_ASSERT((align & (align-1)) == 0);   /* power of 2 */
-    XXH_ASSERT(s != 0 && s < (s + align));  /* empty/overflow */
-    {   /* Overallocate to make room for manual realignment and an offset byte */
-        xxh_u8* base = (xxh_u8*)XXH_malloc(s + align);
-        if (base != NULL) {
-            /*
-             * Get the offset needed to align this pointer.
-             *
-             * Even if the returned pointer is aligned, there will always be
-             * at least one byte to store the offset to the original pointer.
-             */
-            size_t offset = align - ((size_t)base & (align - 1)); /* base % align */
-            /* Add the offset for the now-aligned pointer */
-            xxh_u8* ptr = base + offset;
-
-            XXH_ASSERT((size_t)ptr % align == 0);
-
-            /* Store the offset immediately before the returned pointer. */
-            ptr[-1] = (xxh_u8)offset;
-            return ptr;
-        }
-        return NULL;
-    }
-}
-/*
- * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass
- * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout.
- */
-static void XXH_alignedFree(void* p)
-{
-    if (p != NULL) {
-        xxh_u8* ptr = (xxh_u8*)p;
-        /* Get the offset byte we added in XXH_malloc. */
-        xxh_u8 offset = ptr[-1];
-        /* Free the original malloc'd pointer */
-        xxh_u8* base = ptr - offset;
-        XXH_free(base);
-    }
-}
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
-{
-    XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
-    if (state==NULL) return NULL;
-    XXH3_INITSTATE(state);
-    return state;
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
-{
-    XXH_alignedFree(statePtr);
-    return XXH_OK;
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API void
-XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state)
-{
-    XXH_memcpy(dst_state, src_state, sizeof(*dst_state));
-}
-
-static void
-XXH3_reset_internal(XXH3_state_t* statePtr,
-                    XXH64_hash_t seed,
-                    const void* secret, size_t secretSize)
-{
-    size_t const initStart = offsetof(XXH3_state_t, bufferedSize);
-    size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;
-    XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart);
-    XXH_ASSERT(statePtr != NULL);
-    /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */
-    memset((char*)statePtr + initStart, 0, initLength);
-    statePtr->acc[0] = XXH_PRIME32_3;
-    statePtr->acc[1] = XXH_PRIME64_1;
-    statePtr->acc[2] = XXH_PRIME64_2;
-    statePtr->acc[3] = XXH_PRIME64_3;
-    statePtr->acc[4] = XXH_PRIME64_4;
-    statePtr->acc[5] = XXH_PRIME32_2;
-    statePtr->acc[6] = XXH_PRIME64_5;
-    statePtr->acc[7] = XXH_PRIME32_1;
-    statePtr->seed = seed;
-    statePtr->useSeed = (seed != 0);
-    statePtr->extSecret = (const unsigned char*)secret;
-    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
-    statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
-    statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset(XXH3_state_t* statePtr)
-{
-    if (statePtr == NULL) return XXH_ERROR;
-    XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
-    return XXH_OK;
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
-{
-    if (statePtr == NULL) return XXH_ERROR;
-    XXH3_reset_internal(statePtr, 0, secret, secretSize);
-    if (secret == NULL) return XXH_ERROR;
-    if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
-    return XXH_OK;
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
-{
-    if (statePtr == NULL) return XXH_ERROR;
-    if (seed==0) return XXH3_64bits_reset(statePtr);
-    if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))
-        XXH3_initCustomSecret(statePtr->customSecret, seed);
-    XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
-    return XXH_OK;
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed64)
-{
-    if (statePtr == NULL) return XXH_ERROR;
-    if (secret == NULL) return XXH_ERROR;
-    if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
-    XXH3_reset_internal(statePtr, seed64, secret, secretSize);
-    statePtr->useSeed = 1; /* always, even if seed64==0 */
-    return XXH_OK;
-}
-
-/* Note : when XXH3_consumeStripes() is invoked,
- * there must be a guarantee that at least one more byte must be consumed from input
- * so that the function can blindly consume all stripes using the "normal" secret segment */
-XXH_FORCE_INLINE void
-XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
-                    size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
-                    const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
-                    const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
-                    XXH3_f_accumulate_512 f_acc512,
-                    XXH3_f_scrambleAcc f_scramble)
-{
-    XXH_ASSERT(nbStripes <= nbStripesPerBlock);  /* can handle max 1 scramble per invocation */
-    XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock);
-    if (nbStripesPerBlock - *nbStripesSoFarPtr <= nbStripes) {
-        /* need a scrambling operation */
-        size_t const nbStripesToEndofBlock = nbStripesPerBlock - *nbStripesSoFarPtr;
-        size_t const nbStripesAfterBlock = nbStripes - nbStripesToEndofBlock;
-        XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripesToEndofBlock, f_acc512);
-        f_scramble(acc, secret + secretLimit);
-        XXH3_accumulate(acc, input + nbStripesToEndofBlock * XXH_STRIPE_LEN, secret, nbStripesAfterBlock, f_acc512);
-        *nbStripesSoFarPtr = nbStripesAfterBlock;
-    } else {
-        XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, f_acc512);
-        *nbStripesSoFarPtr += nbStripes;
-    }
-}
-
-#ifndef XXH3_STREAM_USE_STACK
-# ifndef __clang__ /* clang doesn't need additional stack space */
-#   define XXH3_STREAM_USE_STACK 1
-# endif
-#endif
-/*
- * Both XXH3_64bits_update and XXH3_128bits_update use this routine.
- */
-XXH_FORCE_INLINE XXH_errorcode
-XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
-            const xxh_u8* XXH_RESTRICT input, size_t len,
-            XXH3_f_accumulate_512 f_acc512,
-            XXH3_f_scrambleAcc f_scramble)
-{
-    if (input==NULL) {
-        XXH_ASSERT(len == 0);
-        return XXH_OK;
-    }
-
-    XXH_ASSERT(state != NULL);
-    {   const xxh_u8* const bEnd = input + len;
-        const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
-#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
-        /* For some reason, gcc and MSVC seem to suffer greatly
-         * when operating accumulators directly into state.
-         * Operating into stack space seems to enable proper optimization.
-         * clang, on the other hand, doesn't seem to need this trick */
-        XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; memcpy(acc, state->acc, sizeof(acc));
-#else
-        xxh_u64* XXH_RESTRICT const acc = state->acc;
-#endif
-        state->totalLen += len;
-        XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
-
-        /* small input : just fill in tmp buffer */
-        if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) {
-            XXH_memcpy(state->buffer + state->bufferedSize, input, len);
-            state->bufferedSize += (XXH32_hash_t)len;
-            return XXH_OK;
-        }
-
-        /* total input is now > XXH3_INTERNALBUFFER_SIZE */
-        #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
-        XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0);   /* clean multiple */
-
-        /*
-         * Internal buffer is partially filled (always, except at beginning)
-         * Complete it, then consume it.
-         */
-        if (state->bufferedSize) {
-            size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
-            XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
-            input += loadSize;
-            XXH3_consumeStripes(acc,
-                               &state->nbStripesSoFar, state->nbStripesPerBlock,
-                                state->buffer, XXH3_INTERNALBUFFER_STRIPES,
-                                secret, state->secretLimit,
-                                f_acc512, f_scramble);
-            state->bufferedSize = 0;
-        }
-        XXH_ASSERT(input < bEnd);
-
-        /* large input to consume : ingest per full block */
-        if ((size_t)(bEnd - input) > state->nbStripesPerBlock * XXH_STRIPE_LEN) {
-            size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
-            XXH_ASSERT(state->nbStripesPerBlock >= state->nbStripesSoFar);
-            /* join to current block's end */
-            {   size_t const nbStripesToEnd = state->nbStripesPerBlock - state->nbStripesSoFar;
-                XXH_ASSERT(nbStripesToEnd <= nbStripes);
-                XXH3_accumulate(acc, input, secret + state->nbStripesSoFar * XXH_SECRET_CONSUME_RATE, nbStripesToEnd, f_acc512);
-                f_scramble(acc, secret + state->secretLimit);
-                state->nbStripesSoFar = 0;
-                input += nbStripesToEnd * XXH_STRIPE_LEN;
-                nbStripes -= nbStripesToEnd;
-            }
-            /* consume per entire blocks */
-            while(nbStripes >= state->nbStripesPerBlock) {
-                XXH3_accumulate(acc, input, secret, state->nbStripesPerBlock, f_acc512);
-                f_scramble(acc, secret + state->secretLimit);
-                input += state->nbStripesPerBlock * XXH_STRIPE_LEN;
-                nbStripes -= state->nbStripesPerBlock;
-            }
-            /* consume last partial block */
-            XXH3_accumulate(acc, input, secret, nbStripes, f_acc512);
-            input += nbStripes * XXH_STRIPE_LEN;
-            XXH_ASSERT(input < bEnd);  /* at least some bytes left */
-            state->nbStripesSoFar = nbStripes;
-            /* buffer predecessor of last partial stripe */
-            XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
-            XXH_ASSERT(bEnd - input <= XXH_STRIPE_LEN);
-        } else {
-            /* content to consume <= block size */
-            /* Consume input by a multiple of internal buffer size */
-            if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
-                const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
-                do {
-                    XXH3_consumeStripes(acc,
-                                       &state->nbStripesSoFar, state->nbStripesPerBlock,
-                                        input, XXH3_INTERNALBUFFER_STRIPES,
-                                        secret, state->secretLimit,
-                                        f_acc512, f_scramble);
-                    input += XXH3_INTERNALBUFFER_SIZE;
-                } while (input<limit);
-                /* buffer predecessor of last partial stripe */
-                XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
-            }
-        }
-
-        /* Some remaining input (always) : buffer it */
-        XXH_ASSERT(input < bEnd);
-        XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
-        XXH_ASSERT(state->bufferedSize == 0);
-        XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));
-        state->bufferedSize = (XXH32_hash_t)(bEnd-input);
-#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
-        /* save stack accumulators into state */
-        memcpy(state->acc, acc, sizeof(acc));
-#endif
-    }
-
-    return XXH_OK;
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len)
-{
-    return XXH3_update(state, (const xxh_u8*)input, len,
-                       XXH3_accumulate_512, XXH3_scrambleAcc);
-}
-
-
-XXH_FORCE_INLINE void
-XXH3_digest_long (XXH64_hash_t* acc,
-                  const XXH3_state_t* state,
-                  const unsigned char* secret)
-{
-    /*
-     * Digest on a local copy. This way, the state remains unaltered, and it can
-     * continue ingesting more input afterwards.
-     */
-    XXH_memcpy(acc, state->acc, sizeof(state->acc));
-    if (state->bufferedSize >= XXH_STRIPE_LEN) {
-        size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
-        size_t nbStripesSoFar = state->nbStripesSoFar;
-        XXH3_consumeStripes(acc,
-                           &nbStripesSoFar, state->nbStripesPerBlock,
-                            state->buffer, nbStripes,
-                            secret, state->secretLimit,
-                            XXH3_accumulate_512, XXH3_scrambleAcc);
-        /* last stripe */
-        XXH3_accumulate_512(acc,
-                            state->buffer + state->bufferedSize - XXH_STRIPE_LEN,
-                            secret + state->secretLimit - XXH_SECRET_LASTACC_START);
-    } else {  /* bufferedSize < XXH_STRIPE_LEN */
-        xxh_u8 lastStripe[XXH_STRIPE_LEN];
-        size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
-        XXH_ASSERT(state->bufferedSize > 0);  /* there is always some input buffered */
-        XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
-        XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
-        XXH3_accumulate_512(acc,
-                            lastStripe,
-                            secret + state->secretLimit - XXH_SECRET_LASTACC_START);
-    }
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
-{
-    const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
-    if (state->totalLen > XXH3_MIDSIZE_MAX) {
-        XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
-        XXH3_digest_long(acc, state, secret);
-        return XXH3_mergeAccs(acc,
-                              secret + XXH_SECRET_MERGEACCS_START,
-                              (xxh_u64)state->totalLen * XXH_PRIME64_1);
-    }
-    /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
-    if (state->useSeed)
-        return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
-    return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
-                                  secret, state->secretLimit + XXH_STRIPE_LEN);
-}
-
-
-
-/* ==========================================
- * XXH3 128 bits (a.k.a XXH128)
- * ==========================================
- * XXH3's 128-bit variant has better mixing and strength than the 64-bit variant,
- * even without counting the significantly larger output size.
- *
- * For example, extra steps are taken to avoid the seed-dependent collisions
- * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
- *
- * This strength naturally comes at the cost of some speed, especially on short
- * lengths. Note that longer hashes are about as fast as the 64-bit version
- * due to it using only a slight modification of the 64-bit loop.
- *
- * XXH128 is also more oriented towards 64-bit machines. It is still extremely
- * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
- */
-
-XXH_FORCE_INLINE XXH128_hash_t
-XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
-{
-    /* A doubled version of 1to3_64b with different constants. */
-    XXH_ASSERT(input != NULL);
-    XXH_ASSERT(1 <= len && len <= 3);
-    XXH_ASSERT(secret != NULL);
-    /*
-     * len = 1: combinedl = { input[0], 0x01, input[0], input[0] }
-     * len = 2: combinedl = { input[1], 0x02, input[0], input[1] }
-     * len = 3: combinedl = { input[2], 0x03, input[0], input[1] }
-     */
-    {   xxh_u8 const c1 = input[0];
-        xxh_u8 const c2 = input[len >> 1];
-        xxh_u8 const c3 = input[len - 1];
-        xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24)
-                                | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);
-        xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13);
-        xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
-        xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed;
-        xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl;
-        xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph;
-        XXH128_hash_t h128;
-        h128.low64  = XXH64_avalanche(keyed_lo);
-        h128.high64 = XXH64_avalanche(keyed_hi);
-        return h128;
-    }
-}
-
-XXH_FORCE_INLINE XXH128_hash_t
-XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
-{
-    XXH_ASSERT(input != NULL);
-    XXH_ASSERT(secret != NULL);
-    XXH_ASSERT(4 <= len && len <= 8);
-    seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
-    {   xxh_u32 const input_lo = XXH_readLE32(input);
-        xxh_u32 const input_hi = XXH_readLE32(input + len - 4);
-        xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32);
-        xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed;
-        xxh_u64 const keyed = input_64 ^ bitflip;
-
-        /* Shift len to the left to ensure it is even, this avoids even multiplies. */
-        XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2));
-
-        m128.high64 += (m128.low64 << 1);
-        m128.low64  ^= (m128.high64 >> 3);
-
-        m128.low64   = XXH_xorshift64(m128.low64, 35);
-        m128.low64  *= 0x9FB21C651E98DF25ULL;
-        m128.low64   = XXH_xorshift64(m128.low64, 28);
-        m128.high64  = XXH3_avalanche(m128.high64);
-        return m128;
-    }
-}
-
-XXH_FORCE_INLINE XXH128_hash_t
-XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
-{
-    XXH_ASSERT(input != NULL);
-    XXH_ASSERT(secret != NULL);
-    XXH_ASSERT(9 <= len && len <= 16);
-    {   xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed;
-        xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed;
-        xxh_u64 const input_lo = XXH_readLE64(input);
-        xxh_u64       input_hi = XXH_readLE64(input + len - 8);
-        XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1);
-        /*
-         * Put len in the middle of m128 to ensure that the length gets mixed to
-         * both the low and high bits in the 128x64 multiply below.
-         */
-        m128.low64 += (xxh_u64)(len - 1) << 54;
-        input_hi   ^= bitfliph;
-        /*
-         * Add the high 32 bits of input_hi to the high 32 bits of m128, then
-         * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to
-         * the high 64 bits of m128.
-         *
-         * The best approach to this operation is different on 32-bit and 64-bit.
-         */
-        if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */
-            /*
-             * 32-bit optimized version, which is more readable.
-             *
-             * On 32-bit, it removes an ADC and delays a dependency between the two
-             * halves of m128.high64, but it generates an extra mask on 64-bit.
-             */
-            m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2);
-        } else {
-            /*
-             * 64-bit optimized (albeit more confusing) version.
-             *
-             * Uses some properties of addition and multiplication to remove the mask:
-             *
-             * Let:
-             *    a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF)
-             *    b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000)
-             *    c = XXH_PRIME32_2
-             *
-             *    a + (b * c)
-             * Inverse Property: x + y - x == y
-             *    a + (b * (1 + c - 1))
-             * Distributive Property: x * (y + z) == (x * y) + (x * z)
-             *    a + (b * 1) + (b * (c - 1))
-             * Identity Property: x * 1 == x
-             *    a + b + (b * (c - 1))
-             *
-             * Substitute a, b, and c:
-             *    input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
-             *
-             * Since input_hi.hi + input_hi.lo == input_hi, we get this:
-             *    input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
-             */
-            m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1);
-        }
-        /* m128 ^= XXH_swap64(m128 >> 64); */
-        m128.low64  ^= XXH_swap64(m128.high64);
-
-        {   /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */
-            XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2);
-            h128.high64 += m128.high64 * XXH_PRIME64_2;
-
-            h128.low64   = XXH3_avalanche(h128.low64);
-            h128.high64  = XXH3_avalanche(h128.high64);
-            return h128;
-    }   }
-}
-
-/*
- * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
- */
-XXH_FORCE_INLINE XXH128_hash_t
-XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
-{
-    XXH_ASSERT(len <= 16);
-    {   if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed);
-        if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed);
-        if (len) return XXH3_len_1to3_128b(input, len, secret, seed);
-        {   XXH128_hash_t h128;
-            xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72);
-            xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88);
-            h128.low64 = XXH64_avalanche(seed ^ bitflipl);
-            h128.high64 = XXH64_avalanche( seed ^ bitfliph);
-            return h128;
-    }   }
-}
-
-/*
- * A bit slower than XXH3_mix16B, but handles multiply by zero better.
- */
-XXH_FORCE_INLINE XXH128_hash_t
-XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2,
-              const xxh_u8* secret, XXH64_hash_t seed)
-{
-    acc.low64  += XXH3_mix16B (input_1, secret+0, seed);
-    acc.low64  ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8);
-    acc.high64 += XXH3_mix16B (input_2, secret+16, seed);
-    acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8);
-    return acc;
-}
-
-
-XXH_FORCE_INLINE XXH128_hash_t
-XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
-                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
-                      XXH64_hash_t seed)
-{
-    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
-    XXH_ASSERT(16 < len && len <= 128);
-
-    {   XXH128_hash_t acc;
-        acc.low64 = len * XXH_PRIME64_1;
-        acc.high64 = 0;
-        if (len > 32) {
-            if (len > 64) {
-                if (len > 96) {
-                    acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed);
-                }
-                acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed);
-            }
-            acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
-        }
-        acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);
-        {   XXH128_hash_t h128;
-            h128.low64  = acc.low64 + acc.high64;
-            h128.high64 = (acc.low64    * XXH_PRIME64_1)
-                        + (acc.high64   * XXH_PRIME64_4)
-                        + ((len - seed) * XXH_PRIME64_2);
-            h128.low64  = XXH3_avalanche(h128.low64);
-            h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
-            return h128;
-        }
-    }
-}
-
-XXH_NO_INLINE XXH128_hash_t
-XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
-                       const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
-                       XXH64_hash_t seed)
-{
-    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
-    XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
-
-    {   XXH128_hash_t acc;
-        int const nbRounds = (int)len / 32;
-        int i;
-        acc.low64 = len * XXH_PRIME64_1;
-        acc.high64 = 0;
-        for (i=0; i<4; i++) {
-            acc = XXH128_mix32B(acc,
-                                input  + (32 * i),
-                                input  + (32 * i) + 16,
-                                secret + (32 * i),
-                                seed);
-        }
-        acc.low64 = XXH3_avalanche(acc.low64);
-        acc.high64 = XXH3_avalanche(acc.high64);
-        XXH_ASSERT(nbRounds >= 4);
-        for (i=4 ; i < nbRounds; i++) {
-            acc = XXH128_mix32B(acc,
-                                input + (32 * i),
-                                input + (32 * i) + 16,
-                                secret + XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)),
-                                seed);
-        }
-        /* last bytes */
-        acc = XXH128_mix32B(acc,
-                            input + len - 16,
-                            input + len - 32,
-                            secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
-                            0ULL - seed);
-
-        {   XXH128_hash_t h128;
-            h128.low64  = acc.low64 + acc.high64;
-            h128.high64 = (acc.low64    * XXH_PRIME64_1)
-                        + (acc.high64   * XXH_PRIME64_4)
-                        + ((len - seed) * XXH_PRIME64_2);
-            h128.low64  = XXH3_avalanche(h128.low64);
-            h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
-            return h128;
-        }
-    }
-}
-
-XXH_FORCE_INLINE XXH128_hash_t
-XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
-                            const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
-                            XXH3_f_accumulate_512 f_acc512,
-                            XXH3_f_scrambleAcc f_scramble)
-{
-    XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
-
-    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc512, f_scramble);
-
-    /* converge into final hash */
-    XXH_STATIC_ASSERT(sizeof(acc) == 64);
-    XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
-    {   XXH128_hash_t h128;
-        h128.low64  = XXH3_mergeAccs(acc,
-                                     secret + XXH_SECRET_MERGEACCS_START,
-                                     (xxh_u64)len * XXH_PRIME64_1);
-        h128.high64 = XXH3_mergeAccs(acc,
-                                     secret + secretSize
-                                            - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
-                                     ~((xxh_u64)len * XXH_PRIME64_2));
-        return h128;
-    }
-}
-
-/*
- * It's important for performance that XXH3_hashLong is not inlined.
- */
-XXH_NO_INLINE XXH128_hash_t
-XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
-                           XXH64_hash_t seed64,
-                           const void* XXH_RESTRICT secret, size_t secretLen)
-{
-    (void)seed64; (void)secret; (void)secretLen;
-    return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),
-                                       XXH3_accumulate_512, XXH3_scrambleAcc);
-}
-
-/*
- * It's important for performance to pass @secretLen (when it's static)
- * to the compiler, so that it can properly optimize the vectorized loop.
- */
-XXH_FORCE_INLINE XXH128_hash_t
-XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
-                              XXH64_hash_t seed64,
-                              const void* XXH_RESTRICT secret, size_t secretLen)
-{
-    (void)seed64;
-    return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
-                                       XXH3_accumulate_512, XXH3_scrambleAcc);
-}
-
-XXH_FORCE_INLINE XXH128_hash_t
-XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len,
-                                XXH64_hash_t seed64,
-                                XXH3_f_accumulate_512 f_acc512,
-                                XXH3_f_scrambleAcc f_scramble,
-                                XXH3_f_initCustomSecret f_initSec)
-{
-    if (seed64 == 0)
-        return XXH3_hashLong_128b_internal(input, len,
-                                           XXH3_kSecret, sizeof(XXH3_kSecret),
-                                           f_acc512, f_scramble);
-    {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
-        f_initSec(secret, seed64);
-        return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret),
-                                           f_acc512, f_scramble);
-    }
-}
-
-/*
- * It's important for performance that XXH3_hashLong is not inlined.
- */
-XXH_NO_INLINE XXH128_hash_t
-XXH3_hashLong_128b_withSeed(const void* input, size_t len,
-                            XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen)
-{
-    (void)secret; (void)secretLen;
-    return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,
-                XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);
-}
-
-typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t,
-                                            XXH64_hash_t, const void* XXH_RESTRICT, size_t);
-
-XXH_FORCE_INLINE XXH128_hash_t
-XXH3_128bits_internal(const void* input, size_t len,
-                      XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
-                      XXH3_hashLong128_f f_hl128)
-{
-    XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
-    /*
-     * If an action is to be taken if `secret` conditions are not respected,
-     * it should be done here.
-     * For now, it's a contract pre-condition.
-     * Adding a check and a branch here would cost performance at every hash.
-     */
-    if (len <= 16)
-        return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
-    if (len <= 128)
-        return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
-    if (len <= XXH3_MIDSIZE_MAX)
-        return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
-    return f_hl128(input, len, seed64, secret, secretLen);
-}
-
-
-/* ===   Public XXH128 API   === */
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* input, size_t len)
-{
-    return XXH3_128bits_internal(input, len, 0,
-                                 XXH3_kSecret, sizeof(XXH3_kSecret),
-                                 XXH3_hashLong_128b_default);
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
-{
-    return XXH3_128bits_internal(input, len, 0,
-                                 (const xxh_u8*)secret, secretSize,
-                                 XXH3_hashLong_128b_withSecret);
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
-{
-    return XXH3_128bits_internal(input, len, seed,
-                                 XXH3_kSecret, sizeof(XXH3_kSecret),
-                                 XXH3_hashLong_128b_withSeed);
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)
-{
-    if (len <= XXH3_MIDSIZE_MAX)
-        return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
-    return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH128_hash_t
-XXH128(const void* input, size_t len, XXH64_hash_t seed)
-{
-    return XXH3_128bits_withSeed(input, len, seed);
-}
-
-
-/* ===   XXH3 128-bit streaming   === */
-
-/*
- * All initialization and update functions are identical to 64-bit streaming variant.
- * The only difference is the finalization routine.
- */
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset(XXH3_state_t* statePtr)
-{
-    return XXH3_64bits_reset(statePtr);
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
-{
-    return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
-{
-    return XXH3_64bits_reset_withSeed(statePtr, seed);
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed)
-{
-    return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len)
-{
-    return XXH3_update(state, (const xxh_u8*)input, len,
-                       XXH3_accumulate_512, XXH3_scrambleAcc);
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
-{
-    const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
-    if (state->totalLen > XXH3_MIDSIZE_MAX) {
-        XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
-        XXH3_digest_long(acc, state, secret);
-        XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
-        {   XXH128_hash_t h128;
-            h128.low64  = XXH3_mergeAccs(acc,
-                                         secret + XXH_SECRET_MERGEACCS_START,
-                                         (xxh_u64)state->totalLen * XXH_PRIME64_1);
-            h128.high64 = XXH3_mergeAccs(acc,
-                                         secret + state->secretLimit + XXH_STRIPE_LEN
-                                                - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
-                                         ~((xxh_u64)state->totalLen * XXH_PRIME64_2));
-            return h128;
-        }
-    }
-    /* len <= XXH3_MIDSIZE_MAX : short code */
-    if (state->seed)
-        return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
-    return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
-                                   secret, state->secretLimit + XXH_STRIPE_LEN);
-}
-
-/* 128-bit utility functions */
-
-#include <string.h>   /* memcmp, memcpy */
-
-/* return : 1 is equal, 0 if different */
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
-{
-    /* note : XXH128_hash_t is compact, it has no padding byte */
-    return !(memcmp(&h1, &h2, sizeof(h1)));
-}
-
-/* This prototype is compatible with stdlib's qsort().
- * return : >0 if *h128_1  > *h128_2
- *          <0 if *h128_1  < *h128_2
- *          =0 if *h128_1 == *h128_2  */
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)
-{
-    XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
-    XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
-    int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64);
-    /* note : bets that, in most cases, hash values are different */
-    if (hcmp) return hcmp;
-    return (h1.low64 > h2.low64) - (h2.low64 > h1.low64);
-}
-
-
-/*======   Canonical representation   ======*/
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API void
-XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
-{
-    XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
-    if (XXH_CPU_LITTLE_ENDIAN) {
-        hash.high64 = XXH_swap64(hash.high64);
-        hash.low64  = XXH_swap64(hash.low64);
-    }
-    XXH_memcpy(dst, &hash.high64, sizeof(hash.high64));
-    XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH128_hash_t
-XXH128_hashFromCanonical(const XXH128_canonical_t* src)
-{
-    XXH128_hash_t h;
-    h.high64 = XXH_readBE64(src);
-    h.low64  = XXH_readBE64(src->digest + 8);
-    return h;
-}
-
-
-
-/* ==========================================
- * Secret generators
- * ==========================================
- */
-#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
-
-XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128)
-{
-    XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 );
-    XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 );
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize)
-{
-#if (XXH_DEBUGLEVEL >= 1)
-    XXH_ASSERT(secretBuffer != NULL);
-    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
-#else
-    /* production mode, assert() are disabled */
-    if (secretBuffer == NULL) return XXH_ERROR;
-    if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
-#endif
-
-    if (customSeedSize == 0) {
-        customSeed = XXH3_kSecret;
-        customSeedSize = XXH_SECRET_DEFAULT_SIZE;
-    }
-#if (XXH_DEBUGLEVEL >= 1)
-    XXH_ASSERT(customSeed != NULL);
-#else
-    if (customSeed == NULL) return XXH_ERROR;
-#endif
-
-    /* Fill secretBuffer with a copy of customSeed - repeat as needed */
-    {   size_t pos = 0;
-        while (pos < secretSize) {
-            size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize);
-            memcpy((char*)secretBuffer + pos, customSeed, toCopy);
-            pos += toCopy;
-    }   }
-
-    {   size_t const nbSeg16 = secretSize / 16;
-        size_t n;
-        XXH128_canonical_t scrambler;
-        XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
-        for (n=0; n<nbSeg16; n++) {
-            XXH128_hash_t const h128 = XXH128(&scrambler, sizeof(scrambler), n);
-            XXH3_combine16((char*)secretBuffer + n*16, h128);
-        }
-        /* last segment */
-        XXH3_combine16((char*)secretBuffer + secretSize - 16, XXH128_hashFromCanonical(&scrambler));
-    }
-    return XXH_OK;
-}
-
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API void
-XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed)
-{
-    XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
-    XXH3_initCustomSecret(secret, seed);
-    XXH_ASSERT(secretBuffer != NULL);
-    memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE);
-}
-
-
-
-/* Pop our optimization override from above */
-#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
-  && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
-  && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
-#  pragma GCC pop_options
-#endif
-
-#endif  /* XXH_NO_LONG_LONG */
-
-#endif  /* XXH_NO_XXH3 */
-
-/*!
- * @}
- */
-#endif  /* XXH_IMPLEMENTATION */
-
-
-#if defined (__cplusplus)
-}
-#endif
diff --git a/dep/zstd/lib/common/zstd_common.c b/dep/zstd/lib/common/zstd_common.c
deleted file mode 100644
index 3d7e35b30..000000000
--- a/dep/zstd/lib/common/zstd_common.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-
-
-/*-*************************************
-*  Dependencies
-***************************************/
-#define ZSTD_DEPS_NEED_MALLOC
-#include "zstd_deps.h"   /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
-#include "error_private.h"
-#include "zstd_internal.h"
-
-
-/*-****************************************
-*  Version
-******************************************/
-unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; }
-
-const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }
-
-
-/*-****************************************
-*  ZSTD Error Management
-******************************************/
-#undef ZSTD_isError   /* defined within zstd_internal.h */
-/*! ZSTD_isError() :
- *  tells if a return value is an error code
- *  symbol is required for external callers */
-unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
-
-/*! ZSTD_getErrorName() :
- *  provides error code string from function result (useful for debugging) */
-const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
-
-/*! ZSTD_getError() :
- *  convert a `size_t` function result into a proper ZSTD_errorCode enum */
-ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
-
-/*! ZSTD_getErrorString() :
- *  provides error code string from enum */
-const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
-
-
-
-/*=**************************************************************
-*  Custom allocator
-****************************************************************/
-void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
-{
-    if (customMem.customAlloc)
-        return customMem.customAlloc(customMem.opaque, size);
-    return ZSTD_malloc(size);
-}
-
-void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
-{
-    if (customMem.customAlloc) {
-        /* calloc implemented as malloc+memset;
-         * not as efficient as calloc, but next best guess for custom malloc */
-        void* const ptr = customMem.customAlloc(customMem.opaque, size);
-        ZSTD_memset(ptr, 0, size);
-        return ptr;
-    }
-    return ZSTD_calloc(1, size);
-}
-
-void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
-{
-    if (ptr!=NULL) {
-        if (customMem.customFree)
-            customMem.customFree(customMem.opaque, ptr);
-        else
-            ZSTD_free(ptr);
-    }
-}
diff --git a/dep/zstd/lib/common/zstd_deps.h b/dep/zstd/lib/common/zstd_deps.h
deleted file mode 100644
index 14211344a..000000000
--- a/dep/zstd/lib/common/zstd_deps.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-/* This file provides common libc dependencies that zstd requires.
- * The purpose is to allow replacing this file with a custom implementation
- * to compile zstd without libc support.
- */
-
-/* Need:
- * NULL
- * INT_MAX
- * UINT_MAX
- * ZSTD_memcpy()
- * ZSTD_memset()
- * ZSTD_memmove()
- */
-#ifndef ZSTD_DEPS_COMMON
-#define ZSTD_DEPS_COMMON
-
-#include <limits.h>
-#include <stddef.h>
-#include <string.h>
-
-#if defined(__GNUC__) && __GNUC__ >= 4
-# define ZSTD_memcpy(d,s,l) __builtin_memcpy((d),(s),(l))
-# define ZSTD_memmove(d,s,l) __builtin_memmove((d),(s),(l))
-# define ZSTD_memset(p,v,l) __builtin_memset((p),(v),(l))
-#else
-# define ZSTD_memcpy(d,s,l) memcpy((d),(s),(l))
-# define ZSTD_memmove(d,s,l) memmove((d),(s),(l))
-# define ZSTD_memset(p,v,l) memset((p),(v),(l))
-#endif
-
-#endif /* ZSTD_DEPS_COMMON */
-
-/* Need:
- * ZSTD_malloc()
- * ZSTD_free()
- * ZSTD_calloc()
- */
-#ifdef ZSTD_DEPS_NEED_MALLOC
-#ifndef ZSTD_DEPS_MALLOC
-#define ZSTD_DEPS_MALLOC
-
-#include <stdlib.h>
-
-#define ZSTD_malloc(s) malloc(s)
-#define ZSTD_calloc(n,s) calloc((n), (s))
-#define ZSTD_free(p) free((p))
-
-#endif /* ZSTD_DEPS_MALLOC */
-#endif /* ZSTD_DEPS_NEED_MALLOC */
-
-/*
- * Provides 64-bit math support.
- * Need:
- * U64 ZSTD_div64(U64 dividend, U32 divisor)
- */
-#ifdef ZSTD_DEPS_NEED_MATH64
-#ifndef ZSTD_DEPS_MATH64
-#define ZSTD_DEPS_MATH64
-
-#define ZSTD_div64(dividend, divisor) ((dividend) / (divisor))
-
-#endif /* ZSTD_DEPS_MATH64 */
-#endif /* ZSTD_DEPS_NEED_MATH64 */
-
-/* Need:
- * assert()
- */
-#ifdef ZSTD_DEPS_NEED_ASSERT
-#ifndef ZSTD_DEPS_ASSERT
-#define ZSTD_DEPS_ASSERT
-
-#include <assert.h>
-
-#endif /* ZSTD_DEPS_ASSERT */
-#endif /* ZSTD_DEPS_NEED_ASSERT */
-
-/* Need:
- * ZSTD_DEBUG_PRINT()
- */
-#ifdef ZSTD_DEPS_NEED_IO
-#ifndef ZSTD_DEPS_IO
-#define ZSTD_DEPS_IO
-
-#include <stdio.h>
-#define ZSTD_DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__)
-
-#endif /* ZSTD_DEPS_IO */
-#endif /* ZSTD_DEPS_NEED_IO */
-
-/* Only requested when <stdint.h> is known to be present.
- * Need:
- * intptr_t
- */
-#ifdef ZSTD_DEPS_NEED_STDINT
-#ifndef ZSTD_DEPS_STDINT
-#define ZSTD_DEPS_STDINT
-
-#include <stdint.h>
-
-#endif /* ZSTD_DEPS_STDINT */
-#endif /* ZSTD_DEPS_NEED_STDINT */
diff --git a/dep/zstd/lib/common/zstd_internal.h b/dep/zstd/lib/common/zstd_internal.h
deleted file mode 100644
index e4d36ce09..000000000
--- a/dep/zstd/lib/common/zstd_internal.h
+++ /dev/null
@@ -1,493 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef ZSTD_CCOMMON_H_MODULE
-#define ZSTD_CCOMMON_H_MODULE
-
-/* this module contains definitions which must be identical
- * across compression, decompression and dictBuilder.
- * It also contains a few functions useful to at least 2 of them
- * and which benefit from being inlined */
-
-/*-*************************************
-*  Dependencies
-***************************************/
-#include "compiler.h"
-#include "cpu.h"
-#include "mem.h"
-#include "debug.h"                 /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
-#include "error_private.h"
-#define ZSTD_STATIC_LINKING_ONLY
-#include "../zstd.h"
-#define FSE_STATIC_LINKING_ONLY
-#include "fse.h"
-#define HUF_STATIC_LINKING_ONLY
-#include "huf.h"
-#ifndef XXH_STATIC_LINKING_ONLY
-#  define XXH_STATIC_LINKING_ONLY  /* XXH64_state_t */
-#endif
-#include "xxhash.h"                /* XXH_reset, update, digest */
-#ifndef ZSTD_NO_TRACE
-#  include "zstd_trace.h"
-#else
-#  define ZSTD_TRACE 0
-#endif
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-/* ---- static assert (debug) --- */
-#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
-#define ZSTD_isError ERR_isError   /* for inlining */
-#define FSE_isError  ERR_isError
-#define HUF_isError  ERR_isError
-
-
-/*-*************************************
-*  shared macros
-***************************************/
-#undef MIN
-#undef MAX
-#define MIN(a,b) ((a)<(b) ? (a) : (b))
-#define MAX(a,b) ((a)>(b) ? (a) : (b))
-#define BOUNDED(min,val,max) (MAX(min,MIN(val,max)))
-
-
-/*-*************************************
-*  Common constants
-***************************************/
-#define ZSTD_OPT_NUM    (1<<12)
-
-#define ZSTD_REP_NUM      3                 /* number of repcodes */
-static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
-
-#define KB *(1 <<10)
-#define MB *(1 <<20)
-#define GB *(1U<<30)
-
-#define BIT7 128
-#define BIT6  64
-#define BIT5  32
-#define BIT4  16
-#define BIT1   2
-#define BIT0   1
-
-#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
-static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
-static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
-
-#define ZSTD_FRAMEIDSIZE 4   /* magic number size */
-
-#define ZSTD_BLOCKHEADERSIZE 3   /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
-static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
-typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
-
-#define ZSTD_FRAMECHECKSUMSIZE 4
-
-#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
-#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
-
-#define HufLog 12
-typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
-
-#define LONGNBSEQ 0x7F00
-
-#define MINMATCH 3
-
-#define Litbits  8
-#define MaxLit ((1<<Litbits) - 1)
-#define MaxML   52
-#define MaxLL   35
-#define DefaultMaxOff 28
-#define MaxOff  31
-#define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */
-#define MLFSELog    9
-#define LLFSELog    9
-#define OffFSELog   8
-#define MaxFSELog  MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
-
-#define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
-/* Each table cannot take more than #symbols * FSELog bits */
-#define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
-
-static UNUSED_ATTR const U8 LL_bits[MaxLL+1] = {
-     0, 0, 0, 0, 0, 0, 0, 0,
-     0, 0, 0, 0, 0, 0, 0, 0,
-     1, 1, 1, 1, 2, 2, 3, 3,
-     4, 6, 7, 8, 9,10,11,12,
-    13,14,15,16
-};
-static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
-     4, 3, 2, 2, 2, 2, 2, 2,
-     2, 2, 2, 2, 2, 1, 1, 1,
-     2, 2, 2, 2, 2, 2, 2, 2,
-     2, 3, 2, 1, 1, 1, 1, 1,
-    -1,-1,-1,-1
-};
-#define LL_DEFAULTNORMLOG 6  /* for static allocation */
-static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
-
-static UNUSED_ATTR const U8 ML_bits[MaxML+1] = {
-     0, 0, 0, 0, 0, 0, 0, 0,
-     0, 0, 0, 0, 0, 0, 0, 0,
-     0, 0, 0, 0, 0, 0, 0, 0,
-     0, 0, 0, 0, 0, 0, 0, 0,
-     1, 1, 1, 1, 2, 2, 3, 3,
-     4, 4, 5, 7, 8, 9,10,11,
-    12,13,14,15,16
-};
-static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = {
-     1, 4, 3, 2, 2, 2, 2, 2,
-     2, 1, 1, 1, 1, 1, 1, 1,
-     1, 1, 1, 1, 1, 1, 1, 1,
-     1, 1, 1, 1, 1, 1, 1, 1,
-     1, 1, 1, 1, 1, 1, 1, 1,
-     1, 1, 1, 1, 1, 1,-1,-1,
-    -1,-1,-1,-1,-1
-};
-#define ML_DEFAULTNORMLOG 6  /* for static allocation */
-static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
-
-static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = {
-     1, 1, 1, 1, 1, 1, 2, 2,
-     2, 1, 1, 1, 1, 1, 1, 1,
-     1, 1, 1, 1, 1, 1, 1, 1,
-    -1,-1,-1,-1,-1
-};
-#define OF_DEFAULTNORMLOG 5  /* for static allocation */
-static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
-
-
-/*-*******************************************
-*  Shared functions to include for inlining
-*********************************************/
-static void ZSTD_copy8(void* dst, const void* src) {
-#if defined(ZSTD_ARCH_ARM_NEON)
-    vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
-#else
-    ZSTD_memcpy(dst, src, 8);
-#endif
-}
-#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
-
-/* Need to use memmove here since the literal buffer can now be located within
-   the dst buffer. In circumstances where the op "catches up" to where the
-   literal buffer is, there can be partial overlaps in this call on the final
-   copy if the literal is being shifted by less than 16 bytes. */
-static void ZSTD_copy16(void* dst, const void* src) {
-#if defined(ZSTD_ARCH_ARM_NEON)
-    vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
-#elif defined(ZSTD_ARCH_X86_SSE2)
-    _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));
-#elif defined(__clang__)
-    ZSTD_memmove(dst, src, 16);
-#else
-    /* ZSTD_memmove is not inlined properly by gcc */
-    BYTE copy16_buf[16];
-    ZSTD_memcpy(copy16_buf, src, 16);
-    ZSTD_memcpy(dst, copy16_buf, 16);
-#endif
-}
-#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
-
-#define WILDCOPY_OVERLENGTH 32
-#define WILDCOPY_VECLEN 16
-
-typedef enum {
-    ZSTD_no_overlap,
-    ZSTD_overlap_src_before_dst
-    /*  ZSTD_overlap_dst_before_src, */
-} ZSTD_overlap_e;
-
-/*! ZSTD_wildcopy() :
- *  Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
- *  @param ovtype controls the overlap detection
- *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
- *         - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
- *           The src buffer must be before the dst buffer.
- */
-MEM_STATIC FORCE_INLINE_ATTR
-void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
-{
-    ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
-    const BYTE* ip = (const BYTE*)src;
-    BYTE* op = (BYTE*)dst;
-    BYTE* const oend = op + length;
-
-    if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
-        /* Handle short offset copies. */
-        do {
-            COPY8(op, ip)
-        } while (op < oend);
-    } else {
-        assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
-        /* Separate out the first COPY16() call because the copy length is
-         * almost certain to be short, so the branches have different
-         * probabilities. Since it is almost certain to be short, only do
-         * one COPY16() in the first call. Then, do two calls per loop since
-         * at that point it is more likely to have a high trip count.
-         */
-#ifdef __aarch64__
-        do {
-            COPY16(op, ip);
-        }
-        while (op < oend);
-#else
-        ZSTD_copy16(op, ip);
-        if (16 >= length) return;
-        op += 16;
-        ip += 16;
-        do {
-            COPY16(op, ip);
-            COPY16(op, ip);
-        }
-        while (op < oend);
-#endif
-    }
-}
-
-MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
-{
-    size_t const length = MIN(dstCapacity, srcSize);
-    if (length > 0) {
-        ZSTD_memcpy(dst, src, length);
-    }
-    return length;
-}
-
-/* define "workspace is too large" as this number of times larger than needed */
-#define ZSTD_WORKSPACETOOLARGE_FACTOR 3
-
-/* when workspace is continuously too large
- * during at least this number of times,
- * context's memory usage is considered wasteful,
- * because it's sized to handle a worst case scenario which rarely happens.
- * In which case, resize it down to free some memory */
-#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
-
-/* Controls whether the input/output buffer is buffered or stable. */
-typedef enum {
-    ZSTD_bm_buffered = 0,  /* Buffer the input/output */
-    ZSTD_bm_stable = 1     /* ZSTD_inBuffer/ZSTD_outBuffer is stable */
-} ZSTD_bufferMode_e;
-
-
-/*-*******************************************
-*  Private declarations
-*********************************************/
-typedef struct seqDef_s {
-    U32 offBase;   /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */
-    U16 litLength;
-    U16 mlBase;    /* mlBase == matchLength - MINMATCH */
-} seqDef;
-
-/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
-typedef enum {
-    ZSTD_llt_none = 0,             /* no longLengthType */
-    ZSTD_llt_literalLength = 1,    /* represents a long literal */
-    ZSTD_llt_matchLength = 2       /* represents a long match */
-} ZSTD_longLengthType_e;
-
-typedef struct {
-    seqDef* sequencesStart;
-    seqDef* sequences;      /* ptr to end of sequences */
-    BYTE* litStart;
-    BYTE* lit;              /* ptr to end of literals */
-    BYTE* llCode;
-    BYTE* mlCode;
-    BYTE* ofCode;
-    size_t maxNbSeq;
-    size_t maxNbLit;
-
-    /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
-     * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
-     * the existing value of the litLength or matchLength by 0x10000.
-     */
-    ZSTD_longLengthType_e   longLengthType;
-    U32                     longLengthPos;  /* Index of the sequence to apply long length modification to */
-} seqStore_t;
-
-typedef struct {
-    U32 litLength;
-    U32 matchLength;
-} ZSTD_sequenceLength;
-
-/**
- * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
- * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
- */
-MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
-{
-    ZSTD_sequenceLength seqLen;
-    seqLen.litLength = seq->litLength;
-    seqLen.matchLength = seq->mlBase + MINMATCH;
-    if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
-        if (seqStore->longLengthType == ZSTD_llt_literalLength) {
-            seqLen.litLength += 0xFFFF;
-        }
-        if (seqStore->longLengthType == ZSTD_llt_matchLength) {
-            seqLen.matchLength += 0xFFFF;
-        }
-    }
-    return seqLen;
-}
-
-/**
- * Contains the compressed frame size and an upper-bound for the decompressed frame size.
- * Note: before using `compressedSize`, check for errors using ZSTD_isError().
- *       similarly, before using `decompressedBound`, check for errors using:
- *          `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
- */
-typedef struct {
-    size_t compressedSize;
-    unsigned long long decompressedBound;
-} ZSTD_frameSizeInfo;   /* decompress & legacy */
-
-const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBuilder */
-void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
-
-/* custom memory allocation functions */
-void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
-void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem);
-void ZSTD_customFree(void* ptr, ZSTD_customMem customMem);
-
-
-MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus */
-{
-    assert(val != 0);
-    {
-#   if defined(_MSC_VER)   /* Visual */
-#       if STATIC_BMI2 == 1
-            return _lzcnt_u32(val)^31;
-#       else
-            if (val != 0) {
-                unsigned long r;
-                _BitScanReverse(&r, val);
-                return (unsigned)r;
-            } else {
-                /* Should not reach this code path */
-                __assume(0);
-            }
-#       endif
-#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
-        return __builtin_clz (val) ^ 31;
-#   elif defined(__ICCARM__)    /* IAR Intrinsic */
-        return 31 - __CLZ(val);
-#   else   /* Software version */
-        static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
-        U32 v = val;
-        v |= v >> 1;
-        v |= v >> 2;
-        v |= v >> 4;
-        v |= v >> 8;
-        v |= v >> 16;
-        return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
-#   endif
-    }
-}
-
-/**
- * Counts the number of trailing zeros of a `size_t`.
- * Most compilers should support CTZ as a builtin. A backup
- * implementation is provided if the builtin isn't supported, but
- * it may not be terribly efficient.
- */
-MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val)
-{
-    if (MEM_64bits()) {
-#       if defined(_MSC_VER) && defined(_WIN64)
-#           if STATIC_BMI2
-                return _tzcnt_u64(val);
-#           else
-                if (val != 0) {
-                    unsigned long r;
-                    _BitScanForward64(&r, (U64)val);
-                    return (unsigned)r;
-                } else {
-                    /* Should not reach this code path */
-                    __assume(0);
-                }
-#           endif
-#       elif defined(__GNUC__) && (__GNUC__ >= 4)
-            return __builtin_ctzll((U64)val);
-#       else
-            static const int DeBruijnBytePos[64] = {  0,  1,  2,  7,  3, 13,  8, 19,
-                                                      4, 25, 14, 28,  9, 34, 20, 56,
-                                                      5, 17, 26, 54, 15, 41, 29, 43,
-                                                      10, 31, 38, 35, 21, 45, 49, 57,
-                                                      63,  6, 12, 18, 24, 27, 33, 55,
-                                                      16, 53, 40, 42, 30, 37, 44, 48,
-                                                      62, 11, 23, 32, 52, 39, 36, 47,
-                                                      61, 22, 51, 46, 60, 50, 59, 58 };
-            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
-#       endif
-    } else { /* 32 bits */
-#       if defined(_MSC_VER)
-            if (val != 0) {
-                unsigned long r;
-                _BitScanForward(&r, (U32)val);
-                return (unsigned)r;
-            } else {
-                /* Should not reach this code path */
-                __assume(0);
-            }
-#       elif defined(__GNUC__) && (__GNUC__ >= 3)
-            return __builtin_ctz((U32)val);
-#       else
-            static const int DeBruijnBytePos[32] = {  0,  1, 28,  2, 29, 14, 24,  3,
-                                                     30, 22, 20, 15, 25, 17,  4,  8,
-                                                     31, 27, 13, 23, 21, 19, 16,  7,
-                                                     26, 12, 18,  6, 11,  5, 10,  9 };
-            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
-#       endif
-    }
-}
-
-
-/* ZSTD_invalidateRepCodes() :
- * ensures next compression will not use repcodes from previous block.
- * Note : only works with regular variant;
- *        do not use with extDict variant ! */
-void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);   /* zstdmt, adaptive_compression (shouldn't get this definition from here) */
-
-
-typedef struct {
-    blockType_e blockType;
-    U32 lastBlock;
-    U32 origSize;
-} blockProperties_t;   /* declared here for decompress and fullbench */
-
-/*! ZSTD_getcBlockSize() :
- *  Provides the size of compressed block from block header `src` */
-/* Used by: decompress, fullbench (does not get its definition from here) */
-size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
-                          blockProperties_t* bpPtr);
-
-/*! ZSTD_decodeSeqHeaders() :
- *  decode sequence header from src */
-/* Used by: decompress, fullbench (does not get its definition from here) */
-size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
-                       const void* src, size_t srcSize);
-
-/**
- * @returns true iff the CPU supports dynamic BMI2 dispatch.
- */
-MEM_STATIC int ZSTD_cpuSupportsBmi2(void)
-{
-    ZSTD_cpuid_t cpuid = ZSTD_cpuid();
-    return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);
-}
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif   /* ZSTD_CCOMMON_H_MODULE */
diff --git a/dep/zstd/lib/common/zstd_trace.h b/dep/zstd/lib/common/zstd_trace.h
deleted file mode 100644
index f9121f7d8..000000000
--- a/dep/zstd/lib/common/zstd_trace.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (c) Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef ZSTD_TRACE_H
-#define ZSTD_TRACE_H
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-#include <stddef.h>
-
-/* weak symbol support
- * For now, enable conservatively:
- * - Only GNUC
- * - Only ELF
- * - Only x86-64 and i386
- * Also, explicitly disable on platforms known not to work so they aren't
- * forgotten in the future.
- */
-#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && \
-    defined(__GNUC__) && defined(__ELF__) && \
-    (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)) && \
-    !defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \
-    !defined(__CYGWIN__) && !defined(_AIX)
-#  define ZSTD_HAVE_WEAK_SYMBOLS 1
-#else
-#  define ZSTD_HAVE_WEAK_SYMBOLS 0
-#endif
-#if ZSTD_HAVE_WEAK_SYMBOLS
-#  define ZSTD_WEAK_ATTR __attribute__((__weak__))
-#else
-#  define ZSTD_WEAK_ATTR
-#endif
-
-/* Only enable tracing when weak symbols are available. */
-#ifndef ZSTD_TRACE
-#  define ZSTD_TRACE ZSTD_HAVE_WEAK_SYMBOLS
-#endif
-
-#if ZSTD_TRACE
-
-struct ZSTD_CCtx_s;
-struct ZSTD_DCtx_s;
-struct ZSTD_CCtx_params_s;
-
-typedef struct {
-    /**
-     * ZSTD_VERSION_NUMBER
-     *
-     * This is guaranteed to be the first member of ZSTD_trace.
-     * Otherwise, this struct is not stable between versions. If
-     * the version number does not match your expectation, you
-     * should not interpret the rest of the struct.
-     */
-    unsigned version;
-    /**
-     * Non-zero if streaming (de)compression is used.
-     */
-    unsigned streaming;
-    /**
-     * The dictionary ID.
-     */
-    unsigned dictionaryID;
-    /**
-     * Is the dictionary cold?
-     * Only set on decompression.
-     */
-    unsigned dictionaryIsCold;
-    /**
-     * The dictionary size or zero if no dictionary.
-     */
-    size_t dictionarySize;
-    /**
-     * The uncompressed size of the data.
-     */
-    size_t uncompressedSize;
-    /**
-     * The compressed size of the data.
-     */
-    size_t compressedSize;
-    /**
-     * The fully resolved CCtx parameters (NULL on decompression).
-     */
-    struct ZSTD_CCtx_params_s const* params;
-    /**
-     * The ZSTD_CCtx pointer (NULL on decompression).
-     */
-    struct ZSTD_CCtx_s const* cctx;
-    /**
-     * The ZSTD_DCtx pointer (NULL on compression).
-     */
-    struct ZSTD_DCtx_s const* dctx;
-} ZSTD_Trace;
-
-/**
- * A tracing context. It must be 0 when tracing is disabled.
- * Otherwise, any non-zero value returned by a tracing begin()
- * function is presented to any subsequent calls to end().
- *
- * Any non-zero value is treated as tracing is enabled and not
- * interpreted by the library.
- *
- * Two possible uses are:
- * * A timestamp for when the begin() function was called.
- * * A unique key identifying the (de)compression, like the
- *   address of the [dc]ctx pointer if you need to track
- *   more information than just a timestamp.
- */
-typedef unsigned long long ZSTD_TraceCtx;
-
-/**
- * Trace the beginning of a compression call.
- * @param cctx The dctx pointer for the compression.
- *             It can be used as a key to map begin() to end().
- * @returns Non-zero if tracing is enabled. The return value is
- *          passed to ZSTD_trace_compress_end().
- */
-ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_compress_begin(
-    struct ZSTD_CCtx_s const* cctx);
-
-/**
- * Trace the end of a compression call.
- * @param ctx The return value of ZSTD_trace_compress_begin().
- * @param trace The zstd tracing info.
- */
-ZSTD_WEAK_ATTR void ZSTD_trace_compress_end(
-    ZSTD_TraceCtx ctx,
-    ZSTD_Trace const* trace);
-
-/**
- * Trace the beginning of a decompression call.
- * @param dctx The dctx pointer for the decompression.
- *             It can be used as a key to map begin() to end().
- * @returns Non-zero if tracing is enabled. The return value is
- *          passed to ZSTD_trace_compress_end().
- */
-ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_decompress_begin(
-    struct ZSTD_DCtx_s const* dctx);
-
-/**
- * Trace the end of a decompression call.
- * @param ctx The return value of ZSTD_trace_decompress_begin().
- * @param trace The zstd tracing info.
- */
-ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end(
-    ZSTD_TraceCtx ctx,
-    ZSTD_Trace const* trace);
-
-#endif /* ZSTD_TRACE */
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif /* ZSTD_TRACE_H */
diff --git a/dep/zstd/lib/compress/clevels.h b/dep/zstd/lib/compress/clevels.h
deleted file mode 100644
index 7ed2e0049..000000000
--- a/dep/zstd/lib/compress/clevels.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef ZSTD_CLEVELS_H
-#define ZSTD_CLEVELS_H
-
-#define ZSTD_STATIC_LINKING_ONLY  /* ZSTD_compressionParameters  */
-#include "../zstd.h"
-
-/*-=====  Pre-defined compression levels  =====-*/
-
-#define ZSTD_MAX_CLEVEL     22
-
-#ifdef __GNUC__
-__attribute__((__unused__))
-#endif
-
-static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
-{   /* "default" - for any srcSize > 256 KB */
-    /* W,  C,  H,  S,  L, TL, strat */
-    { 19, 12, 13,  1,  6,  1, ZSTD_fast    },  /* base for negative levels */
-    { 19, 13, 14,  1,  7,  0, ZSTD_fast    },  /* level  1 */
-    { 20, 15, 16,  1,  6,  0, ZSTD_fast    },  /* level  2 */
-    { 21, 16, 17,  1,  5,  0, ZSTD_dfast   },  /* level  3 */
-    { 21, 18, 18,  1,  5,  0, ZSTD_dfast   },  /* level  4 */
-    { 21, 18, 19,  3,  5,  2, ZSTD_greedy  },  /* level  5 */
-    { 21, 18, 19,  3,  5,  4, ZSTD_lazy    },  /* level  6 */
-    { 21, 19, 20,  4,  5,  8, ZSTD_lazy    },  /* level  7 */
-    { 21, 19, 20,  4,  5, 16, ZSTD_lazy2   },  /* level  8 */
-    { 22, 20, 21,  4,  5, 16, ZSTD_lazy2   },  /* level  9 */
-    { 22, 21, 22,  5,  5, 16, ZSTD_lazy2   },  /* level 10 */
-    { 22, 21, 22,  6,  5, 16, ZSTD_lazy2   },  /* level 11 */
-    { 22, 22, 23,  6,  5, 32, ZSTD_lazy2   },  /* level 12 */
-    { 22, 22, 22,  4,  5, 32, ZSTD_btlazy2 },  /* level 13 */
-    { 22, 22, 23,  5,  5, 32, ZSTD_btlazy2 },  /* level 14 */
-    { 22, 23, 23,  6,  5, 32, ZSTD_btlazy2 },  /* level 15 */
-    { 22, 22, 22,  5,  5, 48, ZSTD_btopt   },  /* level 16 */
-    { 23, 23, 22,  5,  4, 64, ZSTD_btopt   },  /* level 17 */
-    { 23, 23, 22,  6,  3, 64, ZSTD_btultra },  /* level 18 */
-    { 23, 24, 22,  7,  3,256, ZSTD_btultra2},  /* level 19 */
-    { 25, 25, 23,  7,  3,256, ZSTD_btultra2},  /* level 20 */
-    { 26, 26, 24,  7,  3,512, ZSTD_btultra2},  /* level 21 */
-    { 27, 27, 25,  9,  3,999, ZSTD_btultra2},  /* level 22 */
-},
-{   /* for srcSize <= 256 KB */
-    /* W,  C,  H,  S,  L,  T, strat */
-    { 18, 12, 13,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
-    { 18, 13, 14,  1,  6,  0, ZSTD_fast    },  /* level  1 */
-    { 18, 14, 14,  1,  5,  0, ZSTD_dfast   },  /* level  2 */
-    { 18, 16, 16,  1,  4,  0, ZSTD_dfast   },  /* level  3 */
-    { 18, 16, 17,  3,  5,  2, ZSTD_greedy  },  /* level  4.*/
-    { 18, 17, 18,  5,  5,  2, ZSTD_greedy  },  /* level  5.*/
-    { 18, 18, 19,  3,  5,  4, ZSTD_lazy    },  /* level  6.*/
-    { 18, 18, 19,  4,  4,  4, ZSTD_lazy    },  /* level  7 */
-    { 18, 18, 19,  4,  4,  8, ZSTD_lazy2   },  /* level  8 */
-    { 18, 18, 19,  5,  4,  8, ZSTD_lazy2   },  /* level  9 */
-    { 18, 18, 19,  6,  4,  8, ZSTD_lazy2   },  /* level 10 */
-    { 18, 18, 19,  5,  4, 12, ZSTD_btlazy2 },  /* level 11.*/
-    { 18, 19, 19,  7,  4, 12, ZSTD_btlazy2 },  /* level 12.*/
-    { 18, 18, 19,  4,  4, 16, ZSTD_btopt   },  /* level 13 */
-    { 18, 18, 19,  4,  3, 32, ZSTD_btopt   },  /* level 14.*/
-    { 18, 18, 19,  6,  3,128, ZSTD_btopt   },  /* level 15.*/
-    { 18, 19, 19,  6,  3,128, ZSTD_btultra },  /* level 16.*/
-    { 18, 19, 19,  8,  3,256, ZSTD_btultra },  /* level 17.*/
-    { 18, 19, 19,  6,  3,128, ZSTD_btultra2},  /* level 18.*/
-    { 18, 19, 19,  8,  3,256, ZSTD_btultra2},  /* level 19.*/
-    { 18, 19, 19, 10,  3,512, ZSTD_btultra2},  /* level 20.*/
-    { 18, 19, 19, 12,  3,512, ZSTD_btultra2},  /* level 21.*/
-    { 18, 19, 19, 13,  3,999, ZSTD_btultra2},  /* level 22.*/
-},
-{   /* for srcSize <= 128 KB */
-    /* W,  C,  H,  S,  L,  T, strat */
-    { 17, 12, 12,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
-    { 17, 12, 13,  1,  6,  0, ZSTD_fast    },  /* level  1 */
-    { 17, 13, 15,  1,  5,  0, ZSTD_fast    },  /* level  2 */
-    { 17, 15, 16,  2,  5,  0, ZSTD_dfast   },  /* level  3 */
-    { 17, 17, 17,  2,  4,  0, ZSTD_dfast   },  /* level  4 */
-    { 17, 16, 17,  3,  4,  2, ZSTD_greedy  },  /* level  5 */
-    { 17, 16, 17,  3,  4,  4, ZSTD_lazy    },  /* level  6 */
-    { 17, 16, 17,  3,  4,  8, ZSTD_lazy2   },  /* level  7 */
-    { 17, 16, 17,  4,  4,  8, ZSTD_lazy2   },  /* level  8 */
-    { 17, 16, 17,  5,  4,  8, ZSTD_lazy2   },  /* level  9 */
-    { 17, 16, 17,  6,  4,  8, ZSTD_lazy2   },  /* level 10 */
-    { 17, 17, 17,  5,  4,  8, ZSTD_btlazy2 },  /* level 11 */
-    { 17, 18, 17,  7,  4, 12, ZSTD_btlazy2 },  /* level 12 */
-    { 17, 18, 17,  3,  4, 12, ZSTD_btopt   },  /* level 13.*/
-    { 17, 18, 17,  4,  3, 32, ZSTD_btopt   },  /* level 14.*/
-    { 17, 18, 17,  6,  3,256, ZSTD_btopt   },  /* level 15.*/
-    { 17, 18, 17,  6,  3,128, ZSTD_btultra },  /* level 16.*/
-    { 17, 18, 17,  8,  3,256, ZSTD_btultra },  /* level 17.*/
-    { 17, 18, 17, 10,  3,512, ZSTD_btultra },  /* level 18.*/
-    { 17, 18, 17,  5,  3,256, ZSTD_btultra2},  /* level 19.*/
-    { 17, 18, 17,  7,  3,512, ZSTD_btultra2},  /* level 20.*/
-    { 17, 18, 17,  9,  3,512, ZSTD_btultra2},  /* level 21.*/
-    { 17, 18, 17, 11,  3,999, ZSTD_btultra2},  /* level 22.*/
-},
-{   /* for srcSize <= 16 KB */
-    /* W,  C,  H,  S,  L,  T, strat */
-    { 14, 12, 13,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
-    { 14, 14, 15,  1,  5,  0, ZSTD_fast    },  /* level  1 */
-    { 14, 14, 15,  1,  4,  0, ZSTD_fast    },  /* level  2 */
-    { 14, 14, 15,  2,  4,  0, ZSTD_dfast   },  /* level  3 */
-    { 14, 14, 14,  4,  4,  2, ZSTD_greedy  },  /* level  4 */
-    { 14, 14, 14,  3,  4,  4, ZSTD_lazy    },  /* level  5.*/
-    { 14, 14, 14,  4,  4,  8, ZSTD_lazy2   },  /* level  6 */
-    { 14, 14, 14,  6,  4,  8, ZSTD_lazy2   },  /* level  7 */
-    { 14, 14, 14,  8,  4,  8, ZSTD_lazy2   },  /* level  8.*/
-    { 14, 15, 14,  5,  4,  8, ZSTD_btlazy2 },  /* level  9.*/
-    { 14, 15, 14,  9,  4,  8, ZSTD_btlazy2 },  /* level 10.*/
-    { 14, 15, 14,  3,  4, 12, ZSTD_btopt   },  /* level 11.*/
-    { 14, 15, 14,  4,  3, 24, ZSTD_btopt   },  /* level 12.*/
-    { 14, 15, 14,  5,  3, 32, ZSTD_btultra },  /* level 13.*/
-    { 14, 15, 15,  6,  3, 64, ZSTD_btultra },  /* level 14.*/
-    { 14, 15, 15,  7,  3,256, ZSTD_btultra },  /* level 15.*/
-    { 14, 15, 15,  5,  3, 48, ZSTD_btultra2},  /* level 16.*/
-    { 14, 15, 15,  6,  3,128, ZSTD_btultra2},  /* level 17.*/
-    { 14, 15, 15,  7,  3,256, ZSTD_btultra2},  /* level 18.*/
-    { 14, 15, 15,  8,  3,256, ZSTD_btultra2},  /* level 19.*/
-    { 14, 15, 15,  8,  3,512, ZSTD_btultra2},  /* level 20.*/
-    { 14, 15, 15,  9,  3,512, ZSTD_btultra2},  /* level 21.*/
-    { 14, 15, 15, 10,  3,999, ZSTD_btultra2},  /* level 22.*/
-},
-};
-
-
-
-#endif  /* ZSTD_CLEVELS_H */
diff --git a/dep/zstd/lib/compress/fse_compress.c b/dep/zstd/lib/compress/fse_compress.c
deleted file mode 100644
index 5547b4ac0..000000000
--- a/dep/zstd/lib/compress/fse_compress.c
+++ /dev/null
@@ -1,741 +0,0 @@
-/* ******************************************************************
- * FSE : Finite State Entropy encoder
- * Copyright (c) Yann Collet, Facebook, Inc.
- *
- *  You can contact the author at :
- *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
- *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
-****************************************************************** */
-
-/* **************************************************************
-*  Includes
-****************************************************************/
-#include "../common/compiler.h"
-#include "../common/mem.h"        /* U32, U16, etc. */
-#include "../common/debug.h"      /* assert, DEBUGLOG */
-#include "hist.h"       /* HIST_count_wksp */
-#include "../common/bitstream.h"
-#define FSE_STATIC_LINKING_ONLY
-#include "../common/fse.h"
-#include "../common/error_private.h"
-#define ZSTD_DEPS_NEED_MALLOC
-#define ZSTD_DEPS_NEED_MATH64
-#include "../common/zstd_deps.h"  /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
-
-
-/* **************************************************************
-*  Error Management
-****************************************************************/
-#define FSE_isError ERR_isError
-
-
-/* **************************************************************
-*  Templates
-****************************************************************/
-/*
-  designed to be included
-  for type-specific functions (template emulation in C)
-  Objective is to write these functions only once, for improved maintenance
-*/
-
-/* safety checks */
-#ifndef FSE_FUNCTION_EXTENSION
-#  error "FSE_FUNCTION_EXTENSION must be defined"
-#endif
-#ifndef FSE_FUNCTION_TYPE
-#  error "FSE_FUNCTION_TYPE must be defined"
-#endif
-
-/* Function names */
-#define FSE_CAT(X,Y) X##Y
-#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
-#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
-
-
-/* Function templates */
-
-/* FSE_buildCTable_wksp() :
- * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
- * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
- * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
- */
-size_t FSE_buildCTable_wksp(FSE_CTable* ct,
-                      const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
-                            void* workSpace, size_t wkspSize)
-{
-    U32 const tableSize = 1 << tableLog;
-    U32 const tableMask = tableSize - 1;
-    void* const ptr = ct;
-    U16* const tableU16 = ( (U16*) ptr) + 2;
-    void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
-    FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
-    U32 const step = FSE_TABLESTEP(tableSize);
-    U32 const maxSV1 = maxSymbolValue+1;
-
-    U16* cumul = (U16*)workSpace;   /* size = maxSV1 */
-    FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1));  /* size = tableSize */
-
-    U32 highThreshold = tableSize-1;
-
-    assert(((size_t)workSpace & 1) == 0);  /* Must be 2 bytes-aligned */
-    if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
-    /* CTable header */
-    tableU16[-2] = (U16) tableLog;
-    tableU16[-1] = (U16) maxSymbolValue;
-    assert(tableLog < 16);   /* required for threshold strategy to work */
-
-    /* For explanations on how to distribute symbol values over the table :
-     * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
-
-     #ifdef __clang_analyzer__
-     ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize);   /* useless initialization, just to keep scan-build happy */
-     #endif
-
-    /* symbol start positions */
-    {   U32 u;
-        cumul[0] = 0;
-        for (u=1; u <= maxSV1; u++) {
-            if (normalizedCounter[u-1]==-1) {  /* Low proba symbol */
-                cumul[u] = cumul[u-1] + 1;
-                tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
-            } else {
-                assert(normalizedCounter[u-1] >= 0);
-                cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1];
-                assert(cumul[u] >= cumul[u-1]);  /* no overflow */
-        }   }
-        cumul[maxSV1] = (U16)(tableSize+1);
-    }
-
-    /* Spread symbols */
-    if (highThreshold == tableSize - 1) {
-        /* Case for no low prob count symbols. Lay down 8 bytes at a time
-         * to reduce branch misses since we are operating on a small block
-         */
-        BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */
-        {   U64 const add = 0x0101010101010101ull;
-            size_t pos = 0;
-            U64 sv = 0;
-            U32 s;
-            for (s=0; s<maxSV1; ++s, sv += add) {
-                int i;
-                int const n = normalizedCounter[s];
-                MEM_write64(spread + pos, sv);
-                for (i = 8; i < n; i += 8) {
-                    MEM_write64(spread + pos + i, sv);
-                }
-                assert(n>=0);
-                pos += (size_t)n;
-            }
-        }
-        /* Spread symbols across the table. Lack of lowprob symbols means that
-         * we don't need variable sized inner loop, so we can unroll the loop and
-         * reduce branch misses.
-         */
-        {   size_t position = 0;
-            size_t s;
-            size_t const unroll = 2; /* Experimentally determined optimal unroll */
-            assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
-            for (s = 0; s < (size_t)tableSize; s += unroll) {
-                size_t u;
-                for (u = 0; u < unroll; ++u) {
-                    size_t const uPosition = (position + (u * step)) & tableMask;
-                    tableSymbol[uPosition] = spread[s + u];
-                }
-                position = (position + (unroll * step)) & tableMask;
-            }
-            assert(position == 0);   /* Must have initialized all positions */
-        }
-    } else {
-        U32 position = 0;
-        U32 symbol;
-        for (symbol=0; symbol<maxSV1; symbol++) {
-            int nbOccurrences;
-            int const freq = normalizedCounter[symbol];
-            for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
-                tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
-                position = (position + step) & tableMask;
-                while (position > highThreshold)
-                    position = (position + step) & tableMask;   /* Low proba area */
-        }   }
-        assert(position==0);  /* Must have initialized all positions */
-    }
-
-    /* Build table */
-    {   U32 u; for (u=0; u<tableSize; u++) {
-        FSE_FUNCTION_TYPE s = tableSymbol[u];   /* note : static analyzer may not understand tableSymbol is properly initialized */
-        tableU16[cumul[s]++] = (U16) (tableSize+u);   /* TableU16 : sorted by symbol order; gives next state value */
-    }   }
-
-    /* Build Symbol Transformation Table */
-    {   unsigned total = 0;
-        unsigned s;
-        for (s=0; s<=maxSymbolValue; s++) {
-            switch (normalizedCounter[s])
-            {
-            case  0:
-                /* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
-                symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
-                break;
-
-            case -1:
-            case  1:
-                symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
-                assert(total <= INT_MAX);
-                symbolTT[s].deltaFindState = (int)(total - 1);
-                total ++;
-                break;
-            default :
-                assert(normalizedCounter[s] > 1);
-                {   U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1);
-                    U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
-                    symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
-                    symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
-                    total +=  (unsigned)normalizedCounter[s];
-    }   }   }   }
-
-#if 0  /* debug : symbol costs */
-    DEBUGLOG(5, "\n --- table statistics : ");
-    {   U32 symbol;
-        for (symbol=0; symbol<=maxSymbolValue; symbol++) {
-            DEBUGLOG(5, "%3u: w=%3i,   maxBits=%u, fracBits=%.2f",
-                symbol, normalizedCounter[symbol],
-                FSE_getMaxNbBits(symbolTT, symbol),
-                (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
-    }   }
-#endif
-
-    return 0;
-}
-
-
-
-#ifndef FSE_COMMONDEFS_ONLY
-
-/*-**************************************************************
-*  FSE NCount encoding
-****************************************************************/
-size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
-{
-    size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
-                                   + 4 /* bitCount initialized at 4 */
-                                   + 2 /* first two symbols may use one additional bit each */) / 8)
-                                    + 1 /* round up to whole nb bytes */
-                                    + 2 /* additional two bytes for bitstream flush */;
-    return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
-}
-
-static size_t
-FSE_writeNCount_generic (void* header, size_t headerBufferSize,
-                   const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
-                         unsigned writeIsSafe)
-{
-    BYTE* const ostart = (BYTE*) header;
-    BYTE* out = ostart;
-    BYTE* const oend = ostart + headerBufferSize;
-    int nbBits;
-    const int tableSize = 1 << tableLog;
-    int remaining;
-    int threshold;
-    U32 bitStream = 0;
-    int bitCount = 0;
-    unsigned symbol = 0;
-    unsigned const alphabetSize = maxSymbolValue + 1;
-    int previousIs0 = 0;
-
-    /* Table Size */
-    bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
-    bitCount  += 4;
-
-    /* Init */
-    remaining = tableSize+1;   /* +1 for extra accuracy */
-    threshold = tableSize;
-    nbBits = tableLog+1;
-
-    while ((symbol < alphabetSize) && (remaining>1)) {  /* stops at 1 */
-        if (previousIs0) {
-            unsigned start = symbol;
-            while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
-            if (symbol == alphabetSize) break;   /* incorrect distribution */
-            while (symbol >= start+24) {
-                start+=24;
-                bitStream += 0xFFFFU << bitCount;
-                if ((!writeIsSafe) && (out > oend-2))
-                    return ERROR(dstSize_tooSmall);   /* Buffer overflow */
-                out[0] = (BYTE) bitStream;
-                out[1] = (BYTE)(bitStream>>8);
-                out+=2;
-                bitStream>>=16;
-            }
-            while (symbol >= start+3) {
-                start+=3;
-                bitStream += 3 << bitCount;
-                bitCount += 2;
-            }
-            bitStream += (symbol-start) << bitCount;
-            bitCount += 2;
-            if (bitCount>16) {
-                if ((!writeIsSafe) && (out > oend - 2))
-                    return ERROR(dstSize_tooSmall);   /* Buffer overflow */
-                out[0] = (BYTE)bitStream;
-                out[1] = (BYTE)(bitStream>>8);
-                out += 2;
-                bitStream >>= 16;
-                bitCount -= 16;
-        }   }
-        {   int count = normalizedCounter[symbol++];
-            int const max = (2*threshold-1) - remaining;
-            remaining -= count < 0 ? -count : count;
-            count++;   /* +1 for extra accuracy */
-            if (count>=threshold)
-                count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
-            bitStream += count << bitCount;
-            bitCount  += nbBits;
-            bitCount  -= (count<max);
-            previousIs0  = (count==1);
-            if (remaining<1) return ERROR(GENERIC);
-            while (remaining<threshold) { nbBits--; threshold>>=1; }
-        }
-        if (bitCount>16) {
-            if ((!writeIsSafe) && (out > oend - 2))
-                return ERROR(dstSize_tooSmall);   /* Buffer overflow */
-            out[0] = (BYTE)bitStream;
-            out[1] = (BYTE)(bitStream>>8);
-            out += 2;
-            bitStream >>= 16;
-            bitCount -= 16;
-    }   }
-
-    if (remaining != 1)
-        return ERROR(GENERIC);  /* incorrect normalized distribution */
-    assert(symbol <= alphabetSize);
-
-    /* flush remaining bitStream */
-    if ((!writeIsSafe) && (out > oend - 2))
-        return ERROR(dstSize_tooSmall);   /* Buffer overflow */
-    out[0] = (BYTE)bitStream;
-    out[1] = (BYTE)(bitStream>>8);
-    out+= (bitCount+7) /8;
-
-    return (out-ostart);
-}
-
-
-size_t FSE_writeNCount (void* buffer, size_t bufferSize,
-                  const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
-{
-    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported */
-    if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported */
-
-    if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
-        return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
-
-    return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
-}
-
-
-/*-**************************************************************
-*  FSE Compression Code
-****************************************************************/
-
-FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
-{
-    size_t size;
-    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
-    size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
-    return (FSE_CTable*)ZSTD_malloc(size);
-}
-
-void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }
-
-/* provides the minimum logSize to safely represent a distribution */
-static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
-{
-    U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
-    U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
-    U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
-    assert(srcSize > 1); /* Not supported, RLE should be used instead */
-    return minBits;
-}
-
-unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
-{
-    U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
-    U32 tableLog = maxTableLog;
-    U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
-    assert(srcSize > 1); /* Not supported, RLE should be used instead */
-    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
-    if (maxBitsSrc < tableLog) tableLog = maxBitsSrc;   /* Accuracy can be reduced */
-    if (minBits > tableLog) tableLog = minBits;   /* Need a minimum to safely represent all symbol values */
-    if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
-    if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
-    return tableLog;
-}
-
-unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
-{
-    return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
-}
-
-/* Secondary normalization method.
-   To be used when primary method fails. */
-
-static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount)
-{
-    short const NOT_YET_ASSIGNED = -2;
-    U32 s;
-    U32 distributed = 0;
-    U32 ToDistribute;
-
-    /* Init */
-    U32 const lowThreshold = (U32)(total >> tableLog);
-    U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
-
-    for (s=0; s<=maxSymbolValue; s++) {
-        if (count[s] == 0) {
-            norm[s]=0;
-            continue;
-        }
-        if (count[s] <= lowThreshold) {
-            norm[s] = lowProbCount;
-            distributed++;
-            total -= count[s];
-            continue;
-        }
-        if (count[s] <= lowOne) {
-            norm[s] = 1;
-            distributed++;
-            total -= count[s];
-            continue;
-        }
-
-        norm[s]=NOT_YET_ASSIGNED;
-    }
-    ToDistribute = (1 << tableLog) - distributed;
-
-    if (ToDistribute == 0)
-        return 0;
-
-    if ((total / ToDistribute) > lowOne) {
-        /* risk of rounding to zero */
-        lowOne = (U32)((total * 3) / (ToDistribute * 2));
-        for (s=0; s<=maxSymbolValue; s++) {
-            if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
-                norm[s] = 1;
-                distributed++;
-                total -= count[s];
-                continue;
-        }   }
-        ToDistribute = (1 << tableLog) - distributed;
-    }
-
-    if (distributed == maxSymbolValue+1) {
-        /* all values are pretty poor;
-           probably incompressible data (should have already been detected);
-           find max, then give all remaining points to max */
-        U32 maxV = 0, maxC = 0;
-        for (s=0; s<=maxSymbolValue; s++)
-            if (count[s] > maxC) { maxV=s; maxC=count[s]; }
-        norm[maxV] += (short)ToDistribute;
-        return 0;
-    }
-
-    if (total == 0) {
-        /* all of the symbols were low enough for the lowOne or lowThreshold */
-        for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
-            if (norm[s] > 0) { ToDistribute--; norm[s]++; }
-        return 0;
-    }
-
-    {   U64 const vStepLog = 62 - tableLog;
-        U64 const mid = (1ULL << (vStepLog-1)) - 1;
-        U64 const rStep = ZSTD_div64((((U64)1<<vStepLog) * ToDistribute) + mid, (U32)total);   /* scale on remaining */
-        U64 tmpTotal = mid;
-        for (s=0; s<=maxSymbolValue; s++) {
-            if (norm[s]==NOT_YET_ASSIGNED) {
-                U64 const end = tmpTotal + (count[s] * rStep);
-                U32 const sStart = (U32)(tmpTotal >> vStepLog);
-                U32 const sEnd = (U32)(end >> vStepLog);
-                U32 const weight = sEnd - sStart;
-                if (weight < 1)
-                    return ERROR(GENERIC);
-                norm[s] = (short)weight;
-                tmpTotal = end;
-    }   }   }
-
-    return 0;
-}
-
-size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
-                           const unsigned* count, size_t total,
-                           unsigned maxSymbolValue, unsigned useLowProbCount)
-{
-    /* Sanity checks */
-    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
-    if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported size */
-    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported size */
-    if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */
-
-    {   static U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
-        short const lowProbCount = useLowProbCount ? -1 : 1;
-        U64 const scale = 62 - tableLog;
-        U64 const step = ZSTD_div64((U64)1<<62, (U32)total);   /* <== here, one division ! */
-        U64 const vStep = 1ULL<<(scale-20);
-        int stillToDistribute = 1<<tableLog;
-        unsigned s;
-        unsigned largest=0;
-        short largestP=0;
-        U32 lowThreshold = (U32)(total >> tableLog);
-
-        for (s=0; s<=maxSymbolValue; s++) {
-            if (count[s] == total) return 0;   /* rle special case */
-            if (count[s] == 0) { normalizedCounter[s]=0; continue; }
-            if (count[s] <= lowThreshold) {
-                normalizedCounter[s] = lowProbCount;
-                stillToDistribute--;
-            } else {
-                short proba = (short)((count[s]*step) >> scale);
-                if (proba<8) {
-                    U64 restToBeat = vStep * rtbTable[proba];
-                    proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
-                }
-                if (proba > largestP) { largestP=proba; largest=s; }
-                normalizedCounter[s] = proba;
-                stillToDistribute -= proba;
-        }   }
-        if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
-            /* corner case, need another normalization method */
-            size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount);
-            if (FSE_isError(errorCode)) return errorCode;
-        }
-        else normalizedCounter[largest] += (short)stillToDistribute;
-    }
-
-#if 0
-    {   /* Print Table (debug) */
-        U32 s;
-        U32 nTotal = 0;
-        for (s=0; s<=maxSymbolValue; s++)
-            RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
-        for (s=0; s<=maxSymbolValue; s++)
-            nTotal += abs(normalizedCounter[s]);
-        if (nTotal != (1U<<tableLog))
-            RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
-        getchar();
-    }
-#endif
-
-    return tableLog;
-}
-
-
-/* fake FSE_CTable, for raw (uncompressed) input */
-size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
-{
-    const unsigned tableSize = 1 << nbBits;
-    const unsigned tableMask = tableSize - 1;
-    const unsigned maxSymbolValue = tableMask;
-    void* const ptr = ct;
-    U16* const tableU16 = ( (U16*) ptr) + 2;
-    void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1);   /* assumption : tableLog >= 1 */
-    FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
-    unsigned s;
-
-    /* Sanity checks */
-    if (nbBits < 1) return ERROR(GENERIC);             /* min size */
-
-    /* header */
-    tableU16[-2] = (U16) nbBits;
-    tableU16[-1] = (U16) maxSymbolValue;
-
-    /* Build table */
-    for (s=0; s<tableSize; s++)
-        tableU16[s] = (U16)(tableSize + s);
-
-    /* Build Symbol Transformation Table */
-    {   const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
-        for (s=0; s<=maxSymbolValue; s++) {
-            symbolTT[s].deltaNbBits = deltaNbBits;
-            symbolTT[s].deltaFindState = s-1;
-    }   }
-
-    return 0;
-}
-
-/* fake FSE_CTable, for rle input (always same symbol) */
-size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
-{
-    void* ptr = ct;
-    U16* tableU16 = ( (U16*) ptr) + 2;
-    void* FSCTptr = (U32*)ptr + 2;
-    FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;
-
-    /* header */
-    tableU16[-2] = (U16) 0;
-    tableU16[-1] = (U16) symbolValue;
-
-    /* Build table */
-    tableU16[0] = 0;
-    tableU16[1] = 0;   /* just in case */
-
-    /* Build Symbol Transformation Table */
-    symbolTT[symbolValue].deltaNbBits = 0;
-    symbolTT[symbolValue].deltaFindState = 0;
-
-    return 0;
-}
-
-
-static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
-                           const void* src, size_t srcSize,
-                           const FSE_CTable* ct, const unsigned fast)
-{
-    const BYTE* const istart = (const BYTE*) src;
-    const BYTE* const iend = istart + srcSize;
-    const BYTE* ip=iend;
-
-    BIT_CStream_t bitC;
-    FSE_CState_t CState1, CState2;
-
-    /* init */
-    if (srcSize <= 2) return 0;
-    { size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
-      if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
-
-#define FSE_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
-
-    if (srcSize & 1) {
-        FSE_initCState2(&CState1, ct, *--ip);
-        FSE_initCState2(&CState2, ct, *--ip);
-        FSE_encodeSymbol(&bitC, &CState1, *--ip);
-        FSE_FLUSHBITS(&bitC);
-    } else {
-        FSE_initCState2(&CState2, ct, *--ip);
-        FSE_initCState2(&CState1, ct, *--ip);
-    }
-
-    /* join to mod 4 */
-    srcSize -= 2;
-    if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) {  /* test bit 2 */
-        FSE_encodeSymbol(&bitC, &CState2, *--ip);
-        FSE_encodeSymbol(&bitC, &CState1, *--ip);
-        FSE_FLUSHBITS(&bitC);
-    }
-
-    /* 2 or 4 encoding per loop */
-    while ( ip>istart ) {
-
-        FSE_encodeSymbol(&bitC, &CState2, *--ip);
-
-        if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 )   /* this test must be static */
-            FSE_FLUSHBITS(&bitC);
-
-        FSE_encodeSymbol(&bitC, &CState1, *--ip);
-
-        if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) {  /* this test must be static */
-            FSE_encodeSymbol(&bitC, &CState2, *--ip);
-            FSE_encodeSymbol(&bitC, &CState1, *--ip);
-        }
-
-        FSE_FLUSHBITS(&bitC);
-    }
-
-    FSE_flushCState(&bitC, &CState2);
-    FSE_flushCState(&bitC, &CState1);
-    return BIT_closeCStream(&bitC);
-}
-
-size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
-                           const void* src, size_t srcSize,
-                           const FSE_CTable* ct)
-{
-    unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
-
-    if (fast)
-        return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
-    else
-        return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
-}
-
-
-size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
-
-#ifndef ZSTD_NO_UNUSED_FUNCTIONS
-/* FSE_compress_wksp() :
- * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
- * `wkspSize` size must be `(1<<tableLog)`.
- */
-size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
-{
-    BYTE* const ostart = (BYTE*) dst;
-    BYTE* op = ostart;
-    BYTE* const oend = ostart + dstSize;
-
-    unsigned count[FSE_MAX_SYMBOL_VALUE+1];
-    S16   norm[FSE_MAX_SYMBOL_VALUE+1];
-    FSE_CTable* CTable = (FSE_CTable*)workSpace;
-    size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
-    void* scratchBuffer = (void*)(CTable + CTableSize);
-    size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
-
-    /* init conditions */
-    if (wkspSize < FSE_COMPRESS_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
-    if (srcSize <= 1) return 0;  /* Not compressible */
-    if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
-    if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
-
-    /* Scan input and build symbol stats */
-    {   CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) );
-        if (maxCount == srcSize) return 1;   /* only a single symbol in src : rle */
-        if (maxCount == 1) return 0;         /* each symbol present maximum once => not compressible */
-        if (maxCount < (srcSize >> 7)) return 0;   /* Heuristic : not compressible enough */
-    }
-
-    tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
-    CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue, /* useLowProbCount */ srcSize >= 2048) );
-
-    /* Write table description header */
-    {   CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
-        op += nc_err;
-    }
-
-    /* Compress */
-    CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
-    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
-        if (cSize == 0) return 0;   /* not enough space for compressed data */
-        op += cSize;
-    }
-
-    /* check compressibility */
-    if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
-
-    return op-ostart;
-}
-
-typedef struct {
-    FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
-    union {
-      U32 hist_wksp[HIST_WKSP_SIZE_U32];
-      BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
-    } workspace;
-} fseWkspMax_t;
-
-size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
-{
-    fseWkspMax_t scratchBuffer;
-    DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_COMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE));   /* compilation failures here means scratchBuffer is not large enough */
-    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
-    return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
-}
-
-size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
-{
-    return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
-}
-#endif
-
-#endif   /* FSE_COMMONDEFS_ONLY */
diff --git a/dep/zstd/lib/compress/hist.c b/dep/zstd/lib/compress/hist.c
deleted file mode 100644
index 073c57e75..000000000
--- a/dep/zstd/lib/compress/hist.c
+++ /dev/null
@@ -1,181 +0,0 @@
-/* ******************************************************************
- * hist : Histogram functions
- * part of Finite State Entropy project
- * Copyright (c) Yann Collet, Facebook, Inc.
- *
- *  You can contact the author at :
- *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
- *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
-****************************************************************** */
-
-/* --- dependencies --- */
-#include "../common/mem.h"             /* U32, BYTE, etc. */
-#include "../common/debug.h"           /* assert, DEBUGLOG */
-#include "../common/error_private.h"   /* ERROR */
-#include "hist.h"
-
-
-/* --- Error management --- */
-unsigned HIST_isError(size_t code) { return ERR_isError(code); }
-
-/*-**************************************************************
- *  Histogram functions
- ****************************************************************/
-unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
-                           const void* src, size_t srcSize)
-{
-    const BYTE* ip = (const BYTE*)src;
-    const BYTE* const end = ip + srcSize;
-    unsigned maxSymbolValue = *maxSymbolValuePtr;
-    unsigned largestCount=0;
-
-    ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
-    if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
-
-    while (ip<end) {
-        assert(*ip <= maxSymbolValue);
-        count[*ip++]++;
-    }
-
-    while (!count[maxSymbolValue]) maxSymbolValue--;
-    *maxSymbolValuePtr = maxSymbolValue;
-
-    {   U32 s;
-        for (s=0; s<=maxSymbolValue; s++)
-            if (count[s] > largestCount) largestCount = count[s];
-    }
-
-    return largestCount;
-}
-
-typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
-
-/* HIST_count_parallel_wksp() :
- * store histogram into 4 intermediate tables, recombined at the end.
- * this design makes better use of OoO cpus,
- * and is noticeably faster when some values are heavily repeated.
- * But it needs some additional workspace for intermediate tables.
- * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32.
- * @return : largest histogram frequency,
- *           or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */
-static size_t HIST_count_parallel_wksp(
-                                unsigned* count, unsigned* maxSymbolValuePtr,
-                                const void* source, size_t sourceSize,
-                                HIST_checkInput_e check,
-                                U32* const workSpace)
-{
-    const BYTE* ip = (const BYTE*)source;
-    const BYTE* const iend = ip+sourceSize;
-    size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count);
-    unsigned max=0;
-    U32* const Counting1 = workSpace;
-    U32* const Counting2 = Counting1 + 256;
-    U32* const Counting3 = Counting2 + 256;
-    U32* const Counting4 = Counting3 + 256;
-
-    /* safety checks */
-    assert(*maxSymbolValuePtr <= 255);
-    if (!sourceSize) {
-        ZSTD_memset(count, 0, countSize);
-        *maxSymbolValuePtr = 0;
-        return 0;
-    }
-    ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned));
-
-    /* by stripes of 16 bytes */
-    {   U32 cached = MEM_read32(ip); ip += 4;
-        while (ip < iend-15) {
-            U32 c = cached; cached = MEM_read32(ip); ip += 4;
-            Counting1[(BYTE) c     ]++;
-            Counting2[(BYTE)(c>>8) ]++;
-            Counting3[(BYTE)(c>>16)]++;
-            Counting4[       c>>24 ]++;
-            c = cached; cached = MEM_read32(ip); ip += 4;
-            Counting1[(BYTE) c     ]++;
-            Counting2[(BYTE)(c>>8) ]++;
-            Counting3[(BYTE)(c>>16)]++;
-            Counting4[       c>>24 ]++;
-            c = cached; cached = MEM_read32(ip); ip += 4;
-            Counting1[(BYTE) c     ]++;
-            Counting2[(BYTE)(c>>8) ]++;
-            Counting3[(BYTE)(c>>16)]++;
-            Counting4[       c>>24 ]++;
-            c = cached; cached = MEM_read32(ip); ip += 4;
-            Counting1[(BYTE) c     ]++;
-            Counting2[(BYTE)(c>>8) ]++;
-            Counting3[(BYTE)(c>>16)]++;
-            Counting4[       c>>24 ]++;
-        }
-        ip-=4;
-    }
-
-    /* finish last symbols */
-    while (ip<iend) Counting1[*ip++]++;
-
-    {   U32 s;
-        for (s=0; s<256; s++) {
-            Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
-            if (Counting1[s] > max) max = Counting1[s];
-    }   }
-
-    {   unsigned maxSymbolValue = 255;
-        while (!Counting1[maxSymbolValue]) maxSymbolValue--;
-        if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall);
-        *maxSymbolValuePtr = maxSymbolValue;
-        ZSTD_memmove(count, Counting1, countSize);   /* in case count & Counting1 are overlapping */
-    }
-    return (size_t)max;
-}
-
-/* HIST_countFast_wksp() :
- * Same as HIST_countFast(), but using an externally provided scratch buffer.
- * `workSpace` is a writable buffer which must be 4-bytes aligned,
- * `workSpaceSize` must be >= HIST_WKSP_SIZE
- */
-size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
-                          const void* source, size_t sourceSize,
-                          void* workSpace, size_t workSpaceSize)
-{
-    if (sourceSize < 1500) /* heuristic threshold */
-        return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
-    if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
-    if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
-    return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
-}
-
-/* HIST_count_wksp() :
- * Same as HIST_count(), but using an externally provided scratch buffer.
- * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
-size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
-                       const void* source, size_t sourceSize,
-                       void* workSpace, size_t workSpaceSize)
-{
-    if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
-    if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
-    if (*maxSymbolValuePtr < 255)
-        return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
-    *maxSymbolValuePtr = 255;
-    return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
-}
-
-#ifndef ZSTD_NO_UNUSED_FUNCTIONS
-/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
-size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
-                     const void* source, size_t sourceSize)
-{
-    unsigned tmpCounters[HIST_WKSP_SIZE_U32];
-    return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
-}
-
-size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
-                 const void* src, size_t srcSize)
-{
-    unsigned tmpCounters[HIST_WKSP_SIZE_U32];
-    return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters));
-}
-#endif
diff --git a/dep/zstd/lib/compress/hist.h b/dep/zstd/lib/compress/hist.h
deleted file mode 100644
index 228ed48a7..000000000
--- a/dep/zstd/lib/compress/hist.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/* ******************************************************************
- * hist : Histogram functions
- * part of Finite State Entropy project
- * Copyright (c) Yann Collet, Facebook, Inc.
- *
- *  You can contact the author at :
- *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
- *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
-****************************************************************** */
-
-/* --- dependencies --- */
-#include "../common/zstd_deps.h"   /* size_t */
-
-
-/* --- simple histogram functions --- */
-
-/*! HIST_count():
- *  Provides the precise count of each byte within a table 'count'.
- * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
- *  Updates *maxSymbolValuePtr with actual largest symbol value detected.
- * @return : count of the most frequent symbol (which isn't identified).
- *           or an error code, which can be tested using HIST_isError().
- *           note : if return == srcSize, there is only one symbol.
- */
-size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
-                  const void* src, size_t srcSize);
-
-unsigned HIST_isError(size_t code);  /**< tells if a return value is an error code */
-
-
-/* --- advanced histogram functions --- */
-
-#define HIST_WKSP_SIZE_U32 1024
-#define HIST_WKSP_SIZE    (HIST_WKSP_SIZE_U32 * sizeof(unsigned))
-/** HIST_count_wksp() :
- *  Same as HIST_count(), but using an externally provided scratch buffer.
- *  Benefit is this function will use very little stack space.
- * `workSpace` is a writable buffer which must be 4-bytes aligned,
- * `workSpaceSize` must be >= HIST_WKSP_SIZE
- */
-size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
-                       const void* src, size_t srcSize,
-                       void* workSpace, size_t workSpaceSize);
-
-/** HIST_countFast() :
- *  same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr.
- *  This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr`
- */
-size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
-                      const void* src, size_t srcSize);
-
-/** HIST_countFast_wksp() :
- *  Same as HIST_countFast(), but using an externally provided scratch buffer.
- * `workSpace` is a writable buffer which must be 4-bytes aligned,
- * `workSpaceSize` must be >= HIST_WKSP_SIZE
- */
-size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
-                           const void* src, size_t srcSize,
-                           void* workSpace, size_t workSpaceSize);
-
-/*! HIST_count_simple() :
- *  Same as HIST_countFast(), this function is unsafe,
- *  and will segfault if any value within `src` is `> *maxSymbolValuePtr`.
- *  It is also a bit slower for large inputs.
- *  However, it does not need any additional memory (not even on stack).
- * @return : count of the most frequent symbol.
- *  Note this function doesn't produce any error (i.e. it must succeed).
- */
-unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
-                           const void* src, size_t srcSize);
diff --git a/dep/zstd/lib/compress/huf_compress.c b/dep/zstd/lib/compress/huf_compress.c
deleted file mode 100644
index 2b3d6adc2..000000000
--- a/dep/zstd/lib/compress/huf_compress.c
+++ /dev/null
@@ -1,1370 +0,0 @@
-/* ******************************************************************
- * Huffman encoder, part of New Generation Entropy library
- * Copyright (c) Yann Collet, Facebook, Inc.
- *
- *  You can contact the author at :
- *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
- *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
-****************************************************************** */
-
-/* **************************************************************
-*  Compiler specifics
-****************************************************************/
-#ifdef _MSC_VER    /* Visual Studio */
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
-#endif
-
-
-/* **************************************************************
-*  Includes
-****************************************************************/
-#include "../common/zstd_deps.h"     /* ZSTD_memcpy, ZSTD_memset */
-#include "../common/compiler.h"
-#include "../common/bitstream.h"
-#include "hist.h"
-#define FSE_STATIC_LINKING_ONLY   /* FSE_optimalTableLog_internal */
-#include "../common/fse.h"        /* header compression */
-#define HUF_STATIC_LINKING_ONLY
-#include "../common/huf.h"
-#include "../common/error_private.h"
-
-
-/* **************************************************************
-*  Error Management
-****************************************************************/
-#define HUF_isError ERR_isError
-#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
-
-
-/* **************************************************************
-*  Utils
-****************************************************************/
-unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
-{
-    return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
-}
-
-
-/* *******************************************************
-*  HUF : Huffman block compression
-*********************************************************/
-#define HUF_WORKSPACE_MAX_ALIGNMENT 8
-
-static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)
-{
-    size_t const mask = align - 1;
-    size_t const rem = (size_t)workspace & mask;
-    size_t const add = (align - rem) & mask;
-    BYTE* const aligned = (BYTE*)workspace + add;
-    assert((align & (align - 1)) == 0); /* pow 2 */
-    assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);
-    if (*workspaceSizePtr >= add) {
-        assert(add < align);
-        assert(((size_t)aligned & mask) == 0);
-        *workspaceSizePtr -= add;
-        return aligned;
-    } else {
-        *workspaceSizePtr = 0;
-        return NULL;
-    }
-}
-
-
-/* HUF_compressWeights() :
- * Same as FSE_compress(), but dedicated to huff0's weights compression.
- * The use case needs much less stack memory.
- * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
- */
-#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
-
-typedef struct {
-    FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
-    U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
-    unsigned count[HUF_TABLELOG_MAX+1];
-    S16 norm[HUF_TABLELOG_MAX+1];
-} HUF_CompressWeightsWksp;
-
-static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
-{
-    BYTE* const ostart = (BYTE*) dst;
-    BYTE* op = ostart;
-    BYTE* const oend = ostart + dstSize;
-
-    unsigned maxSymbolValue = HUF_TABLELOG_MAX;
-    U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
-    HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
-
-    if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
-
-    /* init conditions */
-    if (wtSize <= 1) return 0;  /* Not compressible */
-
-    /* Scan input and build symbol stats */
-    {   unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize);   /* never fails */
-        if (maxCount == wtSize) return 1;   /* only a single symbol in src : rle */
-        if (maxCount == 1) return 0;        /* each symbol present maximum once => not compressible */
-    }
-
-    tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
-    CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
-
-    /* Write table description header */
-    {   CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
-        op += hSize;
-    }
-
-    /* Compress */
-    CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
-    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
-        if (cSize == 0) return 0;   /* not enough space for compressed data */
-        op += cSize;
-    }
-
-    return (size_t)(op-ostart);
-}
-
-static size_t HUF_getNbBits(HUF_CElt elt)
-{
-    return elt & 0xFF;
-}
-
-static size_t HUF_getNbBitsFast(HUF_CElt elt)
-{
-    return elt;
-}
-
-static size_t HUF_getValue(HUF_CElt elt)
-{
-    return elt & ~0xFF;
-}
-
-static size_t HUF_getValueFast(HUF_CElt elt)
-{
-    return elt;
-}
-
-static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)
-{
-    assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);
-    *elt = nbBits;
-}
-
-static void HUF_setValue(HUF_CElt* elt, size_t value)
-{
-    size_t const nbBits = HUF_getNbBits(*elt);
-    if (nbBits > 0) {
-        assert((value >> nbBits) == 0);
-        *elt |= value << (sizeof(HUF_CElt) * 8 - nbBits);
-    }
-}
-
-typedef struct {
-    HUF_CompressWeightsWksp wksp;
-    BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
-    BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
-} HUF_WriteCTableWksp;
-
-size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
-                            const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
-                            void* workspace, size_t workspaceSize)
-{
-    HUF_CElt const* const ct = CTable + 1;
-    BYTE* op = (BYTE*)dst;
-    U32 n;
-    HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
-
-    /* check conditions */
-    if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
-    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
-
-    /* convert to weight */
-    wksp->bitsToWeight[0] = 0;
-    for (n=1; n<huffLog+1; n++)
-        wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
-    for (n=0; n<maxSymbolValue; n++)
-        wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])];
-
-    /* attempt weights compression by FSE */
-    if (maxDstSize < 1) return ERROR(dstSize_tooSmall);
-    {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
-        if ((hSize>1) & (hSize < maxSymbolValue/2)) {   /* FSE compressed */
-            op[0] = (BYTE)hSize;
-            return hSize+1;
-    }   }
-
-    /* write raw values as 4-bits (max : 15) */
-    if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen : likely means source cannot be compressed */
-    if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
-    op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
-    wksp->huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
-    for (n=0; n<maxSymbolValue; n+=2)
-        op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
-    return ((maxSymbolValue+1)/2) + 1;
-}
-
-/*! HUF_writeCTable() :
-    `CTable` : Huffman tree to save, using huf representation.
-    @return : size of saved CTable */
-size_t HUF_writeCTable (void* dst, size_t maxDstSize,
-                        const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
-{
-    HUF_WriteCTableWksp wksp;
-    return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
-}
-
-
-size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
-{
-    BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];   /* init not required, even though some static analyzer may complain */
-    U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
-    U32 tableLog = 0;
-    U32 nbSymbols = 0;
-    HUF_CElt* const ct = CTable + 1;
-
-    /* get symbol weights */
-    CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
-    *hasZeroWeights = (rankVal[0] > 0);
-
-    /* check result */
-    if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
-    if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
-
-    CTable[0] = tableLog;
-
-    /* Prepare base value per rank */
-    {   U32 n, nextRankStart = 0;
-        for (n=1; n<=tableLog; n++) {
-            U32 curr = nextRankStart;
-            nextRankStart += (rankVal[n] << (n-1));
-            rankVal[n] = curr;
-    }   }
-
-    /* fill nbBits */
-    {   U32 n; for (n=0; n<nbSymbols; n++) {
-            const U32 w = huffWeight[n];
-            HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0));
-    }   }
-
-    /* fill val */
-    {   U16 nbPerRank[HUF_TABLELOG_MAX+2]  = {0};  /* support w=0=>n=tableLog+1 */
-        U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
-        { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; }
-        /* determine stating value per rank */
-        valPerRank[tableLog+1] = 0;   /* for w==0 */
-        {   U16 min = 0;
-            U32 n; for (n=tableLog; n>0; n--) {  /* start at n=tablelog <-> w=1 */
-                valPerRank[n] = min;     /* get starting value within each rank */
-                min += nbPerRank[n];
-                min >>= 1;
-        }   }
-        /* assign value within rank, symbol order */
-        { U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
-    }
-
-    *maxSymbolValuePtr = nbSymbols - 1;
-    return readSize;
-}
-
-U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
-{
-    const HUF_CElt* ct = CTable + 1;
-    assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
-    return (U32)HUF_getNbBits(ct[symbolValue]);
-}
-
-
-typedef struct nodeElt_s {
-    U32 count;
-    U16 parent;
-    BYTE byte;
-    BYTE nbBits;
-} nodeElt;
-
-/**
- * HUF_setMaxHeight():
- * Enforces maxNbBits on the Huffman tree described in huffNode.
- *
- * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts
- * the tree to so that it is a valid canonical Huffman tree.
- *
- * @pre               The sum of the ranks of each symbol == 2^largestBits,
- *                    where largestBits == huffNode[lastNonNull].nbBits.
- * @post              The sum of the ranks of each symbol == 2^largestBits,
- *                    where largestBits is the return value <= maxNbBits.
- *
- * @param huffNode    The Huffman tree modified in place to enforce maxNbBits.
- * @param lastNonNull The symbol with the lowest count in the Huffman tree.
- * @param maxNbBits   The maximum allowed number of bits, which the Huffman tree
- *                    may not respect. After this function the Huffman tree will
- *                    respect maxNbBits.
- * @return            The maximum number of bits of the Huffman tree after adjustment,
- *                    necessarily no more than maxNbBits.
- */
-static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
-{
-    const U32 largestBits = huffNode[lastNonNull].nbBits;
-    /* early exit : no elt > maxNbBits, so the tree is already valid. */
-    if (largestBits <= maxNbBits) return largestBits;
-
-    /* there are several too large elements (at least >= 2) */
-    {   int totalCost = 0;
-        const U32 baseCost = 1 << (largestBits - maxNbBits);
-        int n = (int)lastNonNull;
-
-        /* Adjust any ranks > maxNbBits to maxNbBits.
-         * Compute totalCost, which is how far the sum of the ranks is
-         * we are over 2^largestBits after adjust the offending ranks.
-         */
-        while (huffNode[n].nbBits > maxNbBits) {
-            totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
-            huffNode[n].nbBits = (BYTE)maxNbBits;
-            n--;
-        }
-        /* n stops at huffNode[n].nbBits <= maxNbBits */
-        assert(huffNode[n].nbBits <= maxNbBits);
-        /* n end at index of smallest symbol using < maxNbBits */
-        while (huffNode[n].nbBits == maxNbBits) --n;
-
-        /* renorm totalCost from 2^largestBits to 2^maxNbBits
-         * note : totalCost is necessarily a multiple of baseCost */
-        assert((totalCost & (baseCost - 1)) == 0);
-        totalCost >>= (largestBits - maxNbBits);
-        assert(totalCost > 0);
-
-        /* repay normalized cost */
-        {   U32 const noSymbol = 0xF0F0F0F0;
-            U32 rankLast[HUF_TABLELOG_MAX+2];
-
-            /* Get pos of last (smallest = lowest cum. count) symbol per rank */
-            ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
-            {   U32 currentNbBits = maxNbBits;
-                int pos;
-                for (pos=n ; pos >= 0; pos--) {
-                    if (huffNode[pos].nbBits >= currentNbBits) continue;
-                    currentNbBits = huffNode[pos].nbBits;   /* < maxNbBits */
-                    rankLast[maxNbBits-currentNbBits] = (U32)pos;
-            }   }
-
-            while (totalCost > 0) {
-                /* Try to reduce the next power of 2 above totalCost because we
-                 * gain back half the rank.
-                 */
-                U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
-                for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
-                    U32 const highPos = rankLast[nBitsToDecrease];
-                    U32 const lowPos = rankLast[nBitsToDecrease-1];
-                    if (highPos == noSymbol) continue;
-                    /* Decrease highPos if no symbols of lowPos or if it is
-                     * not cheaper to remove 2 lowPos than highPos.
-                     */
-                    if (lowPos == noSymbol) break;
-                    {   U32 const highTotal = huffNode[highPos].count;
-                        U32 const lowTotal = 2 * huffNode[lowPos].count;
-                        if (highTotal <= lowTotal) break;
-                }   }
-                /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
-                assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);
-                /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
-                while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
-                    nBitsToDecrease++;
-                assert(rankLast[nBitsToDecrease] != noSymbol);
-                /* Increase the number of bits to gain back half the rank cost. */
-                totalCost -= 1 << (nBitsToDecrease-1);
-                huffNode[rankLast[nBitsToDecrease]].nbBits++;
-
-                /* Fix up the new rank.
-                 * If the new rank was empty, this symbol is now its smallest.
-                 * Otherwise, this symbol will be the largest in the new rank so no adjustment.
-                 */
-                if (rankLast[nBitsToDecrease-1] == noSymbol)
-                    rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
-                /* Fix up the old rank.
-                 * If the symbol was at position 0, meaning it was the highest weight symbol in the tree,
-                 * it must be the only symbol in its rank, so the old rank now has no symbols.
-                 * Otherwise, since the Huffman nodes are sorted by count, the previous position is now
-                 * the smallest node in the rank. If the previous position belongs to a different rank,
-                 * then the rank is now empty.
-                 */
-                if (rankLast[nBitsToDecrease] == 0)    /* special case, reached largest symbol */
-                    rankLast[nBitsToDecrease] = noSymbol;
-                else {
-                    rankLast[nBitsToDecrease]--;
-                    if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
-                        rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
-                }
-            }   /* while (totalCost > 0) */
-
-            /* If we've removed too much weight, then we have to add it back.
-             * To avoid overshooting again, we only adjust the smallest rank.
-             * We take the largest nodes from the lowest rank 0 and move them
-             * to rank 1. There's guaranteed to be enough rank 0 symbols because
-             * TODO.
-             */
-            while (totalCost < 0) {  /* Sometimes, cost correction overshoot */
-                /* special case : no rank 1 symbol (using maxNbBits-1);
-                 * let's create one from largest rank 0 (using maxNbBits).
-                 */
-                if (rankLast[1] == noSymbol) {
-                    while (huffNode[n].nbBits == maxNbBits) n--;
-                    huffNode[n+1].nbBits--;
-                    assert(n >= 0);
-                    rankLast[1] = (U32)(n+1);
-                    totalCost++;
-                    continue;
-                }
-                huffNode[ rankLast[1] + 1 ].nbBits--;
-                rankLast[1]++;
-                totalCost ++;
-            }
-        }   /* repay normalized cost */
-    }   /* there are several too large elements (at least >= 2) */
-
-    return maxNbBits;
-}
-
-typedef struct {
-    U16 base;
-    U16 curr;
-} rankPos;
-
-typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
-
-/* Number of buckets available for HUF_sort() */
-#define RANK_POSITION_TABLE_SIZE 192
-
-typedef struct {
-  huffNodeTable huffNodeTbl;
-  rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
-} HUF_buildCTable_wksp_tables;
-
-/* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
- * Strategy is to use as many buckets as possible for representing distinct
- * counts while using the remainder to represent all "large" counts.
- *
- * To satisfy this requirement for 192 buckets, we can do the following:
- * Let buckets 0-166 represent distinct counts of [0, 166]
- * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
- */
-#define RANK_POSITION_MAX_COUNT_LOG 32
-#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */
-#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */
-
-/* Return the appropriate bucket index for a given count. See definition of
- * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
- */
-static U32 HUF_getIndex(U32 const count) {
-    return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
-        ? count
-        : BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
-}
-
-/* Helper swap function for HUF_quickSortPartition() */
-static void HUF_swapNodes(nodeElt* a, nodeElt* b) {
-	nodeElt tmp = *a;
-	*a = *b;
-	*b = tmp;
-}
-
-/* Returns 0 if the huffNode array is not sorted by descending count */
-MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) {
-    U32 i;
-    for (i = 1; i < maxSymbolValue1; ++i) {
-        if (huffNode[i].count > huffNode[i-1].count) {
-            return 0;
-        }
-    }
-    return 1;
-}
-
-/* Insertion sort by descending order */
-HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) {
-    int i;
-    int const size = high-low+1;
-    huffNode += low;
-    for (i = 1; i < size; ++i) {
-        nodeElt const key = huffNode[i];
-        int j = i - 1;
-        while (j >= 0 && huffNode[j].count < key.count) {
-            huffNode[j + 1] = huffNode[j];
-            j--;
-        }
-        huffNode[j + 1] = key;
-    }
-}
-
-/* Pivot helper function for quicksort. */
-static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) {
-    /* Simply select rightmost element as pivot. "Better" selectors like
-     * median-of-three don't experimentally appear to have any benefit.
-     */
-    U32 const pivot = arr[high].count;
-    int i = low - 1;
-    int j = low;
-    for ( ; j < high; j++) {
-        if (arr[j].count > pivot) {
-            i++;
-            HUF_swapNodes(&arr[i], &arr[j]);
-        }
-    }
-    HUF_swapNodes(&arr[i + 1], &arr[high]);
-    return i + 1;
-}
-
-/* Classic quicksort by descending with partially iterative calls
- * to reduce worst case callstack size.
- */
-static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) {
-    int const kInsertionSortThreshold = 8;
-    if (high - low < kInsertionSortThreshold) {
-        HUF_insertionSort(arr, low, high);
-        return;
-    }
-    while (low < high) {
-        int const idx = HUF_quickSortPartition(arr, low, high);
-        if (idx - low < high - idx) {
-            HUF_simpleQuickSort(arr, low, idx - 1);
-            low = idx + 1;
-        } else {
-            HUF_simpleQuickSort(arr, idx + 1, high);
-            high = idx - 1;
-        }
-    }
-}
-
-/**
- * HUF_sort():
- * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
- * This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.
- *
- * @param[out] huffNode       Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
- *                            Must have (maxSymbolValue + 1) entries.
- * @param[in]  count          Histogram of the symbols.
- * @param[in]  maxSymbolValue Maximum symbol value.
- * @param      rankPosition   This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
- */
-static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) {
-    U32 n;
-    U32 const maxSymbolValue1 = maxSymbolValue+1;
-
-    /* Compute base and set curr to base.
-     * For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.
-     * See HUF_getIndex to see bucketing strategy.
-     * We attribute each symbol to lowerRank's base value, because we want to know where
-     * each rank begins in the output, so for rank R we want to count ranks R+1 and above.
-     */
-    ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
-    for (n = 0; n < maxSymbolValue1; ++n) {
-        U32 lowerRank = HUF_getIndex(count[n]);
-        assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1);
-        rankPosition[lowerRank].base++;
-    }
-
-    assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
-    /* Set up the rankPosition table */
-    for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
-        rankPosition[n-1].base += rankPosition[n].base;
-        rankPosition[n-1].curr = rankPosition[n-1].base;
-    }
-
-    /* Insert each symbol into their appropriate bucket, setting up rankPosition table. */
-    for (n = 0; n < maxSymbolValue1; ++n) {
-        U32 const c = count[n];
-        U32 const r = HUF_getIndex(c) + 1;
-        U32 const pos = rankPosition[r].curr++;
-        assert(pos < maxSymbolValue1);
-        huffNode[pos].count = c;
-        huffNode[pos].byte  = (BYTE)n;
-    }
-
-    /* Sort each bucket. */
-    for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
-        U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base;
-        U32 const bucketStartIdx = rankPosition[n].base;
-        if (bucketSize > 1) {
-            assert(bucketStartIdx < maxSymbolValue1);
-            HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1);
-        }
-    }
-
-    assert(HUF_isSorted(huffNode, maxSymbolValue1));
-}
-
-/** HUF_buildCTable_wksp() :
- *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
- *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
- */
-#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
-
-/* HUF_buildTree():
- * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
- *
- * @param huffNode        The array sorted by HUF_sort(). Builds the Huffman tree in this array.
- * @param maxSymbolValue  The maximum symbol value.
- * @return                The smallest node in the Huffman tree (by count).
- */
-static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
-{
-    nodeElt* const huffNode0 = huffNode - 1;
-    int nonNullRank;
-    int lowS, lowN;
-    int nodeNb = STARTNODE;
-    int n, nodeRoot;
-    /* init for parents */
-    nonNullRank = (int)maxSymbolValue;
-    while(huffNode[nonNullRank].count == 0) nonNullRank--;
-    lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
-    huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
-    huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb;
-    nodeNb++; lowS-=2;
-    for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
-    huffNode0[0].count = (U32)(1U<<31);  /* fake entry, strong barrier */
-
-    /* create parents */
-    while (nodeNb <= nodeRoot) {
-        int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
-        int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
-        huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
-        huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb;
-        nodeNb++;
-    }
-
-    /* distribute weights (unlimited tree height) */
-    huffNode[nodeRoot].nbBits = 0;
-    for (n=nodeRoot-1; n>=STARTNODE; n--)
-        huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
-    for (n=0; n<=nonNullRank; n++)
-        huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
-
-    return nonNullRank;
-}
-
-/**
- * HUF_buildCTableFromTree():
- * Build the CTable given the Huffman tree in huffNode.
- *
- * @param[out] CTable         The output Huffman CTable.
- * @param      huffNode       The Huffman tree.
- * @param      nonNullRank    The last and smallest node in the Huffman tree.
- * @param      maxSymbolValue The maximum symbol value.
- * @param      maxNbBits      The exact maximum number of bits used in the Huffman tree.
- */
-static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
-{
-    HUF_CElt* const ct = CTable + 1;
-    /* fill result into ctable (val, nbBits) */
-    int n;
-    U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
-    U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
-    int const alphabetSize = (int)(maxSymbolValue + 1);
-    for (n=0; n<=nonNullRank; n++)
-        nbPerRank[huffNode[n].nbBits]++;
-    /* determine starting value per rank */
-    {   U16 min = 0;
-        for (n=(int)maxNbBits; n>0; n--) {
-            valPerRank[n] = min;      /* get starting value within each rank */
-            min += nbPerRank[n];
-            min >>= 1;
-    }   }
-    for (n=0; n<alphabetSize; n++)
-        HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits);   /* push nbBits per symbol, symbol order */
-    for (n=0; n<alphabetSize; n++)
-        HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++);   /* assign value within rank, symbol order */
-    CTable[0] = maxNbBits;
-}
-
-size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
-{
-    HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
-    nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
-    nodeElt* const huffNode = huffNode0+1;
-    int nonNullRank;
-
-    /* safety checks */
-    if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
-      return ERROR(workSpace_tooSmall);
-    if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
-    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
-      return ERROR(maxSymbolValue_tooLarge);
-    ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
-
-    /* sort, decreasing order */
-    HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
-
-    /* build tree */
-    nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
-
-    /* enforce maxTableLog */
-    maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
-    if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC);   /* check fit into table */
-
-    HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
-
-    return maxNbBits;
-}
-
-size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
-{
-    HUF_CElt const* ct = CTable + 1;
-    size_t nbBits = 0;
-    int s;
-    for (s = 0; s <= (int)maxSymbolValue; ++s) {
-        nbBits += HUF_getNbBits(ct[s]) * count[s];
-    }
-    return nbBits >> 3;
-}
-
-int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
-  HUF_CElt const* ct = CTable + 1;
-  int bad = 0;
-  int s;
-  for (s = 0; s <= (int)maxSymbolValue; ++s) {
-    bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
-  }
-  return !bad;
-}
-
-size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
-
-/** HUF_CStream_t:
- * Huffman uses its own BIT_CStream_t implementation.
- * There are three major differences from BIT_CStream_t:
- *   1. HUF_addBits() takes a HUF_CElt (size_t) which is
- *      the pair (nbBits, value) in the format:
- *      format:
- *        - Bits [0, 4)            = nbBits
- *        - Bits [4, 64 - nbBits)  = 0
- *        - Bits [64 - nbBits, 64) = value
- *   2. The bitContainer is built from the upper bits and
- *      right shifted. E.g. to add a new value of N bits
- *      you right shift the bitContainer by N, then or in
- *      the new value into the N upper bits.
- *   3. The bitstream has two bit containers. You can add
- *      bits to the second container and merge them into
- *      the first container.
- */
-
-#define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
-
-typedef struct {
-    size_t bitContainer[2];
-    size_t bitPos[2];
-
-    BYTE* startPtr;
-    BYTE* ptr;
-    BYTE* endPtr;
-} HUF_CStream_t;
-
-/**! HUF_initCStream():
- * Initializes the bitstream.
- * @returns 0 or an error code.
- */
-static size_t HUF_initCStream(HUF_CStream_t* bitC,
-                                  void* startPtr, size_t dstCapacity)
-{
-    ZSTD_memset(bitC, 0, sizeof(*bitC));
-    bitC->startPtr = (BYTE*)startPtr;
-    bitC->ptr = bitC->startPtr;
-    bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]);
-    if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall);
-    return 0;
-}
-
-/*! HUF_addBits():
- * Adds the symbol stored in HUF_CElt elt to the bitstream.
- *
- * @param elt   The element we're adding. This is a (nbBits, value) pair.
- *              See the HUF_CStream_t docs for the format.
- * @param idx   Insert into the bitstream at this idx.
- * @param kFast This is a template parameter. If the bitstream is guaranteed
- *              to have at least 4 unused bits after this call it may be 1,
- *              otherwise it must be 0. HUF_addBits() is faster when fast is set.
- */
-FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)
-{
-    assert(idx <= 1);
-    assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);
-    /* This is efficient on x86-64 with BMI2 because shrx
-     * only reads the low 6 bits of the register. The compiler
-     * knows this and elides the mask. When fast is set,
-     * every operation can use the same value loaded from elt.
-     */
-    bitC->bitContainer[idx] >>= HUF_getNbBits(elt);
-    bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);
-    /* We only read the low 8 bits of bitC->bitPos[idx] so it
-     * doesn't matter that the high bits have noise from the value.
-     */
-    bitC->bitPos[idx] += HUF_getNbBitsFast(elt);
-    assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
-    /* The last 4-bits of elt are dirty if fast is set,
-     * so we must not be overwriting bits that have already been
-     * inserted into the bit container.
-     */
-#if DEBUGLEVEL >= 1
-    {
-        size_t const nbBits = HUF_getNbBits(elt);
-        size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1;
-        (void)dirtyBits;
-        /* Middle bits are 0. */
-        assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
-        /* We didn't overwrite any bits in the bit container. */
-        assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
-        (void)dirtyBits;
-    }
-#endif
-}
-
-FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)
-{
-    bitC->bitContainer[1] = 0;
-    bitC->bitPos[1] = 0;
-}
-
-/*! HUF_mergeIndex1() :
- * Merges the bit container @ index 1 into the bit container @ index 0
- * and zeros the bit container @ index 1.
- */
-FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)
-{
-    assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER);
-    bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF);
-    bitC->bitContainer[0] |= bitC->bitContainer[1];
-    bitC->bitPos[0] += bitC->bitPos[1];
-    assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER);
-}
-
-/*! HUF_flushBits() :
-* Flushes the bits in the bit container @ index 0.
-*
-* @post bitPos will be < 8.
-* @param kFast If kFast is set then we must know a-priori that
-*              the bit container will not overflow.
-*/
-FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)
-{
-    /* The upper bits of bitPos are noisy, so we must mask by 0xFF. */
-    size_t const nbBits = bitC->bitPos[0] & 0xFF;
-    size_t const nbBytes = nbBits >> 3;
-    /* The top nbBits bits of bitContainer are the ones we need. */
-    size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits);
-    /* Mask bitPos to account for the bytes we consumed. */
-    bitC->bitPos[0] &= 7;
-    assert(nbBits > 0);
-    assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8);
-    assert(bitC->ptr <= bitC->endPtr);
-    MEM_writeLEST(bitC->ptr, bitContainer);
-    bitC->ptr += nbBytes;
-    assert(!kFast || bitC->ptr <= bitC->endPtr);
-    if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
-    /* bitContainer doesn't need to be modified because the leftover
-     * bits are already the top bitPos bits. And we don't care about
-     * noise in the lower values.
-     */
-}
-
-/*! HUF_endMark()
- * @returns The Huffman stream end mark: A 1-bit value = 1.
- */
-static HUF_CElt HUF_endMark(void)
-{
-    HUF_CElt endMark;
-    HUF_setNbBits(&endMark, 1);
-    HUF_setValue(&endMark, 1);
-    return endMark;
-}
-
-/*! HUF_closeCStream() :
- *  @return Size of CStream, in bytes,
- *          or 0 if it could not fit into dstBuffer */
-static size_t HUF_closeCStream(HUF_CStream_t* bitC)
-{
-    HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0);
-    HUF_flushBits(bitC, /* kFast */ 0);
-    {
-        size_t const nbBits = bitC->bitPos[0] & 0xFF;
-        if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
-        return (bitC->ptr - bitC->startPtr) + (nbBits > 0);
-    }
-}
-
-FORCE_INLINE_TEMPLATE void
-HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)
-{
-    HUF_addBits(bitCPtr, CTable[symbol], idx, fast);
-}
-
-FORCE_INLINE_TEMPLATE void
-HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,
-                                   const BYTE* ip, size_t srcSize,
-                                   const HUF_CElt* ct,
-                                   int kUnroll, int kFastFlush, int kLastFast)
-{
-    /* Join to kUnroll */
-    int n = (int)srcSize;
-    int rem = n % kUnroll;
-    if (rem > 0) {
-        for (; rem > 0; --rem) {
-            HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0);
-        }
-        HUF_flushBits(bitC, kFastFlush);
-    }
-    assert(n % kUnroll == 0);
-
-    /* Join to 2 * kUnroll */
-    if (n % (2 * kUnroll)) {
-        int u;
-        for (u = 1; u < kUnroll; ++u) {
-            HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1);
-        }
-        HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast);
-        HUF_flushBits(bitC, kFastFlush);
-        n -= kUnroll;
-    }
-    assert(n % (2 * kUnroll) == 0);
-
-    for (; n>0; n-= 2 * kUnroll) {
-        /* Encode kUnroll symbols into the bitstream @ index 0. */
-        int u;
-        for (u = 1; u < kUnroll; ++u) {
-            HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1);
-        }
-        HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast);
-        HUF_flushBits(bitC, kFastFlush);
-        /* Encode kUnroll symbols into the bitstream @ index 1.
-         * This allows us to start filling the bit container
-         * without any data dependencies.
-         */
-        HUF_zeroIndex1(bitC);
-        for (u = 1; u < kUnroll; ++u) {
-            HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1);
-        }
-        HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast);
-        /* Merge bitstream @ index 1 into the bitstream @ index 0 */
-        HUF_mergeIndex1(bitC);
-        HUF_flushBits(bitC, kFastFlush);
-    }
-    assert(n == 0);
-
-}
-
-/**
- * Returns a tight upper bound on the output space needed by Huffman
- * with 8 bytes buffer to handle over-writes. If the output is at least
- * this large we don't need to do bounds checks during Huffman encoding.
- */
-static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)
-{
-    return ((srcSize * tableLog) >> 3) + 8;
-}
-
-
-FORCE_INLINE_TEMPLATE size_t
-HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
-                                   const void* src, size_t srcSize,
-                                   const HUF_CElt* CTable)
-{
-    U32 const tableLog = (U32)CTable[0];
-    HUF_CElt const* ct = CTable + 1;
-    const BYTE* ip = (const BYTE*) src;
-    BYTE* const ostart = (BYTE*)dst;
-    BYTE* const oend = ostart + dstSize;
-    BYTE* op = ostart;
-    HUF_CStream_t bitC;
-
-    /* init */
-    if (dstSize < 8) return 0;   /* not enough space to compress */
-    { size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
-      if (HUF_isError(initErr)) return 0; }
-
-    if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
-        HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);
-    else {
-        if (MEM_32bits()) {
-            switch (tableLog) {
-            case 11:
-                HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);
-                break;
-            case 10: ZSTD_FALLTHROUGH;
-            case 9: ZSTD_FALLTHROUGH;
-            case 8:
-                HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);
-                break;
-            case 7: ZSTD_FALLTHROUGH;
-            default:
-                HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);
-                break;
-            }
-        } else {
-            switch (tableLog) {
-            case 11:
-                HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);
-                break;
-            case 10:
-                HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);
-                break;
-            case 9:
-                HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);
-                break;
-            case 8:
-                HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);
-                break;
-            case 7:
-                HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);
-                break;
-            case 6: ZSTD_FALLTHROUGH;
-            default:
-                HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);
-                break;
-            }
-        }
-    }
-    assert(bitC.ptr <= bitC.endPtr);
-
-    return HUF_closeCStream(&bitC);
-}
-
-#if DYNAMIC_BMI2
-
-static BMI2_TARGET_ATTRIBUTE size_t
-HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
-                                   const void* src, size_t srcSize,
-                                   const HUF_CElt* CTable)
-{
-    return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
-}
-
-static size_t
-HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
-                                      const void* src, size_t srcSize,
-                                      const HUF_CElt* CTable)
-{
-    return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
-}
-
-static size_t
-HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
-                              const void* src, size_t srcSize,
-                              const HUF_CElt* CTable, const int bmi2)
-{
-    if (bmi2) {
-        return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
-    }
-    return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
-}
-
-#else
-
-static size_t
-HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
-                              const void* src, size_t srcSize,
-                              const HUF_CElt* CTable, const int bmi2)
-{
-    (void)bmi2;
-    return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
-}
-
-#endif
-
-size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
-{
-    return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
-}
-
-size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
-{
-    return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
-}
-
-static size_t
-HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
-                              const void* src, size_t srcSize,
-                              const HUF_CElt* CTable, int bmi2)
-{
-    size_t const segmentSize = (srcSize+3)/4;   /* first 3 segments */
-    const BYTE* ip = (const BYTE*) src;
-    const BYTE* const iend = ip + srcSize;
-    BYTE* const ostart = (BYTE*) dst;
-    BYTE* const oend = ostart + dstSize;
-    BYTE* op = ostart;
-
-    if (dstSize < 6 + 1 + 1 + 1 + 8) return 0;   /* minimum space to compress successfully */
-    if (srcSize < 12) return 0;   /* no saving possible : too small input */
-    op += 6;   /* jumpTable */
-
-    assert(op <= oend);
-    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
-        if (cSize == 0 || cSize > 65535) return 0;
-        MEM_writeLE16(ostart, (U16)cSize);
-        op += cSize;
-    }
-
-    ip += segmentSize;
-    assert(op <= oend);
-    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
-        if (cSize == 0 || cSize > 65535) return 0;
-        MEM_writeLE16(ostart+2, (U16)cSize);
-        op += cSize;
-    }
-
-    ip += segmentSize;
-    assert(op <= oend);
-    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
-        if (cSize == 0 || cSize > 65535) return 0;
-        MEM_writeLE16(ostart+4, (U16)cSize);
-        op += cSize;
-    }
-
-    ip += segmentSize;
-    assert(op <= oend);
-    assert(ip <= iend);
-    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
-        if (cSize == 0 || cSize > 65535) return 0;
-        op += cSize;
-    }
-
-    return (size_t)(op-ostart);
-}
-
-size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
-{
-    return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
-}
-
-size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
-{
-    return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
-}
-
-typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
-
-static size_t HUF_compressCTable_internal(
-                BYTE* const ostart, BYTE* op, BYTE* const oend,
-                const void* src, size_t srcSize,
-                HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
-{
-    size_t const cSize = (nbStreams==HUF_singleStream) ?
-                         HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
-                         HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
-    if (HUF_isError(cSize)) { return cSize; }
-    if (cSize==0) { return 0; }   /* uncompressible */
-    op += cSize;
-    /* check compressibility */
-    assert(op >= ostart);
-    if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
-    return (size_t)(op-ostart);
-}
-
-typedef struct {
-    unsigned count[HUF_SYMBOLVALUE_MAX + 1];
-    HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];
-    union {
-        HUF_buildCTable_wksp_tables buildCTable_wksp;
-        HUF_WriteCTableWksp writeCTable_wksp;
-        U32 hist_wksp[HIST_WKSP_SIZE_U32];
-    } wksps;
-} HUF_compress_tables_t;
-
-#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
-#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10  /* Must be >= 2 */
-
-/* HUF_compress_internal() :
- * `workSpace_align4` must be aligned on 4-bytes boundaries,
- * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
-static size_t
-HUF_compress_internal (void* dst, size_t dstSize,
-                 const void* src, size_t srcSize,
-                       unsigned maxSymbolValue, unsigned huffLog,
-                       HUF_nbStreams_e nbStreams,
-                       void* workSpace, size_t wkspSize,
-                       HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
-                 const int bmi2, unsigned suspectUncompressible)
-{
-    HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
-    BYTE* const ostart = (BYTE*)dst;
-    BYTE* const oend = ostart + dstSize;
-    BYTE* op = ostart;
-
-    HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
-
-    /* checks & inits */
-    if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
-    if (!srcSize) return 0;  /* Uncompressed */
-    if (!dstSize) return 0;  /* cannot fit anything within dst budget */
-    if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);   /* current block size limit */
-    if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
-    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
-    if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
-    if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
-
-    /* Heuristic : If old table is valid, use it for small inputs */
-    if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
-        return HUF_compressCTable_internal(ostart, op, oend,
-                                           src, srcSize,
-                                           nbStreams, oldHufTable, bmi2);
-    }
-
-    /* If uncompressible data is suspected, do a smaller sampling first */
-    DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
-    if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
-        size_t largestTotal = 0;
-        {   unsigned maxSymbolValueBegin = maxSymbolValue;
-            CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
-            largestTotal += largestBegin;
-        }
-        {   unsigned maxSymbolValueEnd = maxSymbolValue;
-            CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
-            largestTotal += largestEnd;
-        }
-        if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0;   /* heuristic : probably not compressible enough */
-    }
-
-    /* Scan input and build symbol stats */
-    {   CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) );
-        if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
-        if (largest <= (srcSize >> 7)+4) return 0;   /* heuristic : probably not compressible enough */
-    }
-
-    /* Check validity of previous table */
-    if ( repeat
-      && *repeat == HUF_repeat_check
-      && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
-        *repeat = HUF_repeat_none;
-    }
-    /* Heuristic : use existing table for small inputs */
-    if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
-        return HUF_compressCTable_internal(ostart, op, oend,
-                                           src, srcSize,
-                                           nbStreams, oldHufTable, bmi2);
-    }
-
-    /* Build Huffman Tree */
-    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
-    {   size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
-                                            maxSymbolValue, huffLog,
-                                            &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
-        CHECK_F(maxBits);
-        huffLog = (U32)maxBits;
-    }
-    /* Zero unused symbols in CTable, so we can check it for validity */
-    {
-        size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
-        size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
-        ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
-    }
-
-    /* Write table description header */
-    {   CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
-                                              &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
-        /* Check if using previous huffman table is beneficial */
-        if (repeat && *repeat != HUF_repeat_none) {
-            size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
-            size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
-            if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
-                return HUF_compressCTable_internal(ostart, op, oend,
-                                                   src, srcSize,
-                                                   nbStreams, oldHufTable, bmi2);
-        }   }
-
-        /* Use the new huffman table */
-        if (hSize + 12ul >= srcSize) { return 0; }
-        op += hSize;
-        if (repeat) { *repeat = HUF_repeat_none; }
-        if (oldHufTable)
-            ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable));  /* Save new table */
-    }
-    return HUF_compressCTable_internal(ostart, op, oend,
-                                       src, srcSize,
-                                       nbStreams, table->CTable, bmi2);
-}
-
-
-size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
-                      const void* src, size_t srcSize,
-                      unsigned maxSymbolValue, unsigned huffLog,
-                      void* workSpace, size_t wkspSize)
-{
-    return HUF_compress_internal(dst, dstSize, src, srcSize,
-                                 maxSymbolValue, huffLog, HUF_singleStream,
-                                 workSpace, wkspSize,
-                                 NULL, NULL, 0, 0 /*bmi2*/, 0);
-}
-
-size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
-                      const void* src, size_t srcSize,
-                      unsigned maxSymbolValue, unsigned huffLog,
-                      void* workSpace, size_t wkspSize,
-                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat,
-                      int bmi2, unsigned suspectUncompressible)
-{
-    return HUF_compress_internal(dst, dstSize, src, srcSize,
-                                 maxSymbolValue, huffLog, HUF_singleStream,
-                                 workSpace, wkspSize, hufTable,
-                                 repeat, preferRepeat, bmi2, suspectUncompressible);
-}
-
-/* HUF_compress4X_repeat():
- * compress input using 4 streams.
- * provide workspace to generate compression tables */
-size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
-                      const void* src, size_t srcSize,
-                      unsigned maxSymbolValue, unsigned huffLog,
-                      void* workSpace, size_t wkspSize)
-{
-    return HUF_compress_internal(dst, dstSize, src, srcSize,
-                                 maxSymbolValue, huffLog, HUF_fourStreams,
-                                 workSpace, wkspSize,
-                                 NULL, NULL, 0, 0 /*bmi2*/, 0);
-}
-
-/* HUF_compress4X_repeat():
- * compress input using 4 streams.
- * consider skipping quickly
- * re-use an existing huffman compression table */
-size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
-                      const void* src, size_t srcSize,
-                      unsigned maxSymbolValue, unsigned huffLog,
-                      void* workSpace, size_t wkspSize,
-                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible)
-{
-    return HUF_compress_internal(dst, dstSize, src, srcSize,
-                                 maxSymbolValue, huffLog, HUF_fourStreams,
-                                 workSpace, wkspSize,
-                                 hufTable, repeat, preferRepeat, bmi2, suspectUncompressible);
-}
-
-#ifndef ZSTD_NO_UNUSED_FUNCTIONS
-/** HUF_buildCTable() :
- * @return : maxNbBits
- *  Note : count is used before tree is written, so they can safely overlap
- */
-size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
-{
-    HUF_buildCTable_wksp_tables workspace;
-    return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
-}
-
-size_t HUF_compress1X (void* dst, size_t dstSize,
-                 const void* src, size_t srcSize,
-                 unsigned maxSymbolValue, unsigned huffLog)
-{
-    U64 workSpace[HUF_WORKSPACE_SIZE_U64];
-    return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
-}
-
-size_t HUF_compress2 (void* dst, size_t dstSize,
-                const void* src, size_t srcSize,
-                unsigned maxSymbolValue, unsigned huffLog)
-{
-    U64 workSpace[HUF_WORKSPACE_SIZE_U64];
-    return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
-}
-
-size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
-{
-    return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
-}
-#endif
diff --git a/dep/zstd/lib/compress/zstd_compress.c b/dep/zstd/lib/compress/zstd_compress.c
deleted file mode 100644
index f06456af9..000000000
--- a/dep/zstd/lib/compress/zstd_compress.c
+++ /dev/null
@@ -1,6327 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-/*-*************************************
-*  Dependencies
-***************************************/
-#include "../common/zstd_deps.h"  /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
-#include "../common/mem.h"
-#include "hist.h"           /* HIST_countFast_wksp */
-#define FSE_STATIC_LINKING_ONLY   /* FSE_encodeSymbol */
-#include "../common/fse.h"
-#define HUF_STATIC_LINKING_ONLY
-#include "../common/huf.h"
-#include "zstd_compress_internal.h"
-#include "zstd_compress_sequences.h"
-#include "zstd_compress_literals.h"
-#include "zstd_fast.h"
-#include "zstd_double_fast.h"
-#include "zstd_lazy.h"
-#include "zstd_opt.h"
-#include "zstd_ldm.h"
-#include "zstd_compress_superblock.h"
-
-/* ***************************************************************
-*  Tuning parameters
-*****************************************************************/
-/*!
- * COMPRESS_HEAPMODE :
- * Select how default decompression function ZSTD_compress() allocates its context,
- * on stack (0, default), or into heap (1).
- * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected.
- */
-#ifndef ZSTD_COMPRESS_HEAPMODE
-#  define ZSTD_COMPRESS_HEAPMODE 0
-#endif
-
-/*!
- * ZSTD_HASHLOG3_MAX :
- * Maximum size of the hash table dedicated to find 3-bytes matches,
- * in log format, aka 17 => 1 << 17 == 128Ki positions.
- * This structure is only used in zstd_opt.
- * Since allocation is centralized for all strategies, it has to be known here.
- * The actual (selected) size of the hash table is then stored in ZSTD_matchState_t.hashLog3,
- * so that zstd_opt.c doesn't need to know about this constant.
- */
-#ifndef ZSTD_HASHLOG3_MAX
-#  define ZSTD_HASHLOG3_MAX 17
-#endif
-
-/*-*************************************
-*  Helper functions
-***************************************/
-/* ZSTD_compressBound()
- * Note that the result from this function is only compatible with the "normal"
- * full-block strategy.
- * When there are a lot of small blocks due to frequent flush in streaming mode
- * the overhead of headers can make the compressed data to be larger than the
- * return value of ZSTD_compressBound().
- */
-size_t ZSTD_compressBound(size_t srcSize) {
-    return ZSTD_COMPRESSBOUND(srcSize);
-}
-
-
-/*-*************************************
-*  Context memory management
-***************************************/
-struct ZSTD_CDict_s {
-    const void* dictContent;
-    size_t dictContentSize;
-    ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */
-    U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
-    ZSTD_cwksp workspace;
-    ZSTD_matchState_t matchState;
-    ZSTD_compressedBlockState_t cBlockState;
-    ZSTD_customMem customMem;
-    U32 dictID;
-    int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */
-    ZSTD_paramSwitch_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use
-                                           * row-based matchfinder. Unless the cdict is reloaded, we will use
-                                           * the same greedy/lazy matchfinder at compression time.
-                                           */
-};  /* typedef'd to ZSTD_CDict within "zstd.h" */
-
-ZSTD_CCtx* ZSTD_createCCtx(void)
-{
-    return ZSTD_createCCtx_advanced(ZSTD_defaultCMem);
-}
-
-static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager)
-{
-    assert(cctx != NULL);
-    ZSTD_memset(cctx, 0, sizeof(*cctx));
-    cctx->customMem = memManager;
-    cctx->bmi2 = ZSTD_cpuSupportsBmi2();
-    {   size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters);
-        assert(!ZSTD_isError(err));
-        (void)err;
-    }
-}
-
-ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
-{
-    ZSTD_STATIC_ASSERT(zcss_init==0);
-    ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));
-    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
-    {   ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_customMalloc(sizeof(ZSTD_CCtx), customMem);
-        if (!cctx) return NULL;
-        ZSTD_initCCtx(cctx, customMem);
-        return cctx;
-    }
-}
-
-ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize)
-{
-    ZSTD_cwksp ws;
-    ZSTD_CCtx* cctx;
-    if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL;  /* minimum size */
-    if ((size_t)workspace & 7) return NULL;  /* must be 8-aligned */
-    ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);
-
-    cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx));
-    if (cctx == NULL) return NULL;
-
-    ZSTD_memset(cctx, 0, sizeof(ZSTD_CCtx));
-    ZSTD_cwksp_move(&cctx->workspace, &ws);
-    cctx->staticSize = workspaceSize;
-
-    /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
-    if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL;
-    cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
-    cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
-    cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE);
-    cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
-    return cctx;
-}
-
-/**
- * Clears and frees all of the dictionaries in the CCtx.
- */
-static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx)
-{
-    ZSTD_customFree(cctx->localDict.dictBuffer, cctx->customMem);
-    ZSTD_freeCDict(cctx->localDict.cdict);
-    ZSTD_memset(&cctx->localDict, 0, sizeof(cctx->localDict));
-    ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));
-    cctx->cdict = NULL;
-}
-
-static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict)
-{
-    size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0;
-    size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict);
-    return bufferSize + cdictSize;
-}
-
-static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)
-{
-    assert(cctx != NULL);
-    assert(cctx->staticSize == 0);
-    ZSTD_clearAllDicts(cctx);
-#ifdef ZSTD_MULTITHREAD
-    ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL;
-#endif
-    ZSTD_cwksp_free(&cctx->workspace, cctx->customMem);
-}
-
-size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
-{
-    if (cctx==NULL) return 0;   /* support free on NULL */
-    RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
-                    "not compatible with static CCtx");
-    {
-        int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
-        ZSTD_freeCCtxContent(cctx);
-        if (!cctxInWorkspace) {
-            ZSTD_customFree(cctx, cctx->customMem);
-        }
-    }
-    return 0;
-}
-
-
-static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx)
-{
-#ifdef ZSTD_MULTITHREAD
-    return ZSTDMT_sizeof_CCtx(cctx->mtctx);
-#else
-    (void)cctx;
-    return 0;
-#endif
-}
-
-
-size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
-{
-    if (cctx==NULL) return 0;   /* support sizeof on NULL */
-    /* cctx may be in the workspace */
-    return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx))
-           + ZSTD_cwksp_sizeof(&cctx->workspace)
-           + ZSTD_sizeof_localDict(cctx->localDict)
-           + ZSTD_sizeof_mtctx(cctx);
-}
-
-size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
-{
-    return ZSTD_sizeof_CCtx(zcs);  /* same object */
-}
-
-/* private API call, for dictBuilder only */
-const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
-
-/* Returns true if the strategy supports using a row based matchfinder */
-static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) {
-    return (strategy >= ZSTD_greedy && strategy <= ZSTD_lazy2);
-}
-
-/* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder
- * for this compression.
- */
-static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_paramSwitch_e mode) {
-    assert(mode != ZSTD_ps_auto);
-    return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_ps_enable);
-}
-
-/* Returns row matchfinder usage given an initial mode and cParams */
-static ZSTD_paramSwitch_e ZSTD_resolveRowMatchFinderMode(ZSTD_paramSwitch_e mode,
-                                                         const ZSTD_compressionParameters* const cParams) {
-#if defined(ZSTD_ARCH_X86_SSE2) || defined(ZSTD_ARCH_ARM_NEON)
-    int const kHasSIMD128 = 1;
-#else
-    int const kHasSIMD128 = 0;
-#endif
-    if (mode != ZSTD_ps_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */
-    mode = ZSTD_ps_disable;
-    if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode;
-    if (kHasSIMD128) {
-        if (cParams->windowLog > 14) mode = ZSTD_ps_enable;
-    } else {
-        if (cParams->windowLog > 17) mode = ZSTD_ps_enable;
-    }
-    return mode;
-}
-
-/* Returns block splitter usage (generally speaking, when using slower/stronger compression modes) */
-static ZSTD_paramSwitch_e ZSTD_resolveBlockSplitterMode(ZSTD_paramSwitch_e mode,
-                                                        const ZSTD_compressionParameters* const cParams) {
-    if (mode != ZSTD_ps_auto) return mode;
-    return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17) ? ZSTD_ps_enable : ZSTD_ps_disable;
-}
-
-/* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */
-static int ZSTD_allocateChainTable(const ZSTD_strategy strategy,
-                                   const ZSTD_paramSwitch_e useRowMatchFinder,
-                                   const U32 forDDSDict) {
-    assert(useRowMatchFinder != ZSTD_ps_auto);
-    /* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate.
-     * We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder.
-     */
-    return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder));
-}
-
-/* Returns 1 if compression parameters are such that we should
- * enable long distance matching (wlog >= 27, strategy >= btopt).
- * Returns 0 otherwise.
- */
-static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode,
-                                 const ZSTD_compressionParameters* const cParams) {
-    if (mode != ZSTD_ps_auto) return mode;
-    return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable;
-}
-
-static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
-        ZSTD_compressionParameters cParams)
-{
-    ZSTD_CCtx_params cctxParams;
-    /* should not matter, as all cParams are presumed properly defined */
-    ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT);
-    cctxParams.cParams = cParams;
-
-    /* Adjust advanced params according to cParams */
-    cctxParams.ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams.ldmParams.enableLdm, &cParams);
-    if (cctxParams.ldmParams.enableLdm == ZSTD_ps_enable) {
-        ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams);
-        assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog);
-        assert(cctxParams.ldmParams.hashRateLog < 32);
-    }
-    cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams);
-    cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
-    assert(!ZSTD_checkCParams(cParams));
-    return cctxParams;
-}
-
-static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced(
-        ZSTD_customMem customMem)
-{
-    ZSTD_CCtx_params* params;
-    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
-    params = (ZSTD_CCtx_params*)ZSTD_customCalloc(
-            sizeof(ZSTD_CCtx_params), customMem);
-    if (!params) { return NULL; }
-    ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
-    params->customMem = customMem;
-    return params;
-}
-
-ZSTD_CCtx_params* ZSTD_createCCtxParams(void)
-{
-    return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem);
-}
-
-size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params)
-{
-    if (params == NULL) { return 0; }
-    ZSTD_customFree(params, params->customMem);
-    return 0;
-}
-
-size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params)
-{
-    return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
-}
-
-size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) {
-    RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
-    ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
-    cctxParams->compressionLevel = compressionLevel;
-    cctxParams->fParams.contentSizeFlag = 1;
-    return 0;
-}
-
-#define ZSTD_NO_CLEVEL 0
-
-/**
- * Initializes the cctxParams from params and compressionLevel.
- * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL.
- */
-static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel)
-{
-    assert(!ZSTD_checkCParams(params->cParams));
-    ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
-    cctxParams->cParams = params->cParams;
-    cctxParams->fParams = params->fParams;
-    /* Should not matter, as all cParams are presumed properly defined.
-     * But, set it for tracing anyway.
-     */
-    cctxParams->compressionLevel = compressionLevel;
-    cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, &params->cParams);
-    cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, &params->cParams);
-    cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, &params->cParams);
-    DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d",
-                cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm);
-}
-
-size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
-{
-    RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
-    FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
-    ZSTD_CCtxParams_init_internal(cctxParams, &params, ZSTD_NO_CLEVEL);
-    return 0;
-}
-
-/**
- * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone.
- * @param param Validated zstd parameters.
- */
-static void ZSTD_CCtxParams_setZstdParams(
-        ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params)
-{
-    assert(!ZSTD_checkCParams(params->cParams));
-    cctxParams->cParams = params->cParams;
-    cctxParams->fParams = params->fParams;
-    /* Should not matter, as all cParams are presumed properly defined.
-     * But, set it for tracing anyway.
-     */
-    cctxParams->compressionLevel = ZSTD_NO_CLEVEL;
-}
-
-ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
-{
-    ZSTD_bounds bounds = { 0, 0, 0 };
-
-    switch(param)
-    {
-    case ZSTD_c_compressionLevel:
-        bounds.lowerBound = ZSTD_minCLevel();
-        bounds.upperBound = ZSTD_maxCLevel();
-        return bounds;
-
-    case ZSTD_c_windowLog:
-        bounds.lowerBound = ZSTD_WINDOWLOG_MIN;
-        bounds.upperBound = ZSTD_WINDOWLOG_MAX;
-        return bounds;
-
-    case ZSTD_c_hashLog:
-        bounds.lowerBound = ZSTD_HASHLOG_MIN;
-        bounds.upperBound = ZSTD_HASHLOG_MAX;
-        return bounds;
-
-    case ZSTD_c_chainLog:
-        bounds.lowerBound = ZSTD_CHAINLOG_MIN;
-        bounds.upperBound = ZSTD_CHAINLOG_MAX;
-        return bounds;
-
-    case ZSTD_c_searchLog:
-        bounds.lowerBound = ZSTD_SEARCHLOG_MIN;
-        bounds.upperBound = ZSTD_SEARCHLOG_MAX;
-        return bounds;
-
-    case ZSTD_c_minMatch:
-        bounds.lowerBound = ZSTD_MINMATCH_MIN;
-        bounds.upperBound = ZSTD_MINMATCH_MAX;
-        return bounds;
-
-    case ZSTD_c_targetLength:
-        bounds.lowerBound = ZSTD_TARGETLENGTH_MIN;
-        bounds.upperBound = ZSTD_TARGETLENGTH_MAX;
-        return bounds;
-
-    case ZSTD_c_strategy:
-        bounds.lowerBound = ZSTD_STRATEGY_MIN;
-        bounds.upperBound = ZSTD_STRATEGY_MAX;
-        return bounds;
-
-    case ZSTD_c_contentSizeFlag:
-        bounds.lowerBound = 0;
-        bounds.upperBound = 1;
-        return bounds;
-
-    case ZSTD_c_checksumFlag:
-        bounds.lowerBound = 0;
-        bounds.upperBound = 1;
-        return bounds;
-
-    case ZSTD_c_dictIDFlag:
-        bounds.lowerBound = 0;
-        bounds.upperBound = 1;
-        return bounds;
-
-    case ZSTD_c_nbWorkers:
-        bounds.lowerBound = 0;
-#ifdef ZSTD_MULTITHREAD
-        bounds.upperBound = ZSTDMT_NBWORKERS_MAX;
-#else
-        bounds.upperBound = 0;
-#endif
-        return bounds;
-
-    case ZSTD_c_jobSize:
-        bounds.lowerBound = 0;
-#ifdef ZSTD_MULTITHREAD
-        bounds.upperBound = ZSTDMT_JOBSIZE_MAX;
-#else
-        bounds.upperBound = 0;
-#endif
-        return bounds;
-
-    case ZSTD_c_overlapLog:
-#ifdef ZSTD_MULTITHREAD
-        bounds.lowerBound = ZSTD_OVERLAPLOG_MIN;
-        bounds.upperBound = ZSTD_OVERLAPLOG_MAX;
-#else
-        bounds.lowerBound = 0;
-        bounds.upperBound = 0;
-#endif
-        return bounds;
-
-    case ZSTD_c_enableDedicatedDictSearch:
-        bounds.lowerBound = 0;
-        bounds.upperBound = 1;
-        return bounds;
-
-    case ZSTD_c_enableLongDistanceMatching:
-        bounds.lowerBound = 0;
-        bounds.upperBound = 1;
-        return bounds;
-
-    case ZSTD_c_ldmHashLog:
-        bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN;
-        bounds.upperBound = ZSTD_LDM_HASHLOG_MAX;
-        return bounds;
-
-    case ZSTD_c_ldmMinMatch:
-        bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN;
-        bounds.upperBound = ZSTD_LDM_MINMATCH_MAX;
-        return bounds;
-
-    case ZSTD_c_ldmBucketSizeLog:
-        bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN;
-        bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX;
-        return bounds;
-
-    case ZSTD_c_ldmHashRateLog:
-        bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN;
-        bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX;
-        return bounds;
-
-    /* experimental parameters */
-    case ZSTD_c_rsyncable:
-        bounds.lowerBound = 0;
-        bounds.upperBound = 1;
-        return bounds;
-
-    case ZSTD_c_forceMaxWindow :
-        bounds.lowerBound = 0;
-        bounds.upperBound = 1;
-        return bounds;
-
-    case ZSTD_c_format:
-        ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
-        bounds.lowerBound = ZSTD_f_zstd1;
-        bounds.upperBound = ZSTD_f_zstd1_magicless;   /* note : how to ensure at compile time that this is the highest value enum ? */
-        return bounds;
-
-    case ZSTD_c_forceAttachDict:
-        ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad);
-        bounds.lowerBound = ZSTD_dictDefaultAttach;
-        bounds.upperBound = ZSTD_dictForceLoad;       /* note : how to ensure at compile time that this is the highest value enum ? */
-        return bounds;
-
-    case ZSTD_c_literalCompressionMode:
-        ZSTD_STATIC_ASSERT(ZSTD_ps_auto < ZSTD_ps_enable && ZSTD_ps_enable < ZSTD_ps_disable);
-        bounds.lowerBound = (int)ZSTD_ps_auto;
-        bounds.upperBound = (int)ZSTD_ps_disable;
-        return bounds;
-
-    case ZSTD_c_targetCBlockSize:
-        bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN;
-        bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
-        return bounds;
-
-    case ZSTD_c_srcSizeHint:
-        bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN;
-        bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;
-        return bounds;
-
-    case ZSTD_c_stableInBuffer:
-    case ZSTD_c_stableOutBuffer:
-        bounds.lowerBound = (int)ZSTD_bm_buffered;
-        bounds.upperBound = (int)ZSTD_bm_stable;
-        return bounds;
-
-    case ZSTD_c_blockDelimiters:
-        bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters;
-        bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters;
-        return bounds;
-
-    case ZSTD_c_validateSequences:
-        bounds.lowerBound = 0;
-        bounds.upperBound = 1;
-        return bounds;
-
-    case ZSTD_c_useBlockSplitter:
-        bounds.lowerBound = (int)ZSTD_ps_auto;
-        bounds.upperBound = (int)ZSTD_ps_disable;
-        return bounds;
-
-    case ZSTD_c_useRowMatchFinder:
-        bounds.lowerBound = (int)ZSTD_ps_auto;
-        bounds.upperBound = (int)ZSTD_ps_disable;
-        return bounds;
-
-    case ZSTD_c_deterministicRefPrefix:
-        bounds.lowerBound = 0;
-        bounds.upperBound = 1;
-        return bounds;
-
-    default:
-        bounds.error = ERROR(parameter_unsupported);
-        return bounds;
-    }
-}
-
-/* ZSTD_cParam_clampBounds:
- * Clamps the value into the bounded range.
- */
-static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value)
-{
-    ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
-    if (ZSTD_isError(bounds.error)) return bounds.error;
-    if (*value < bounds.lowerBound) *value = bounds.lowerBound;
-    if (*value > bounds.upperBound) *value = bounds.upperBound;
-    return 0;
-}
-
-#define BOUNDCHECK(cParam, val) { \
-    RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
-                    parameter_outOfBound, "Param out of bounds"); \
-}
-
-
-static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
-{
-    switch(param)
-    {
-    case ZSTD_c_compressionLevel:
-    case ZSTD_c_hashLog:
-    case ZSTD_c_chainLog:
-    case ZSTD_c_searchLog:
-    case ZSTD_c_minMatch:
-    case ZSTD_c_targetLength:
-    case ZSTD_c_strategy:
-        return 1;
-
-    case ZSTD_c_format:
-    case ZSTD_c_windowLog:
-    case ZSTD_c_contentSizeFlag:
-    case ZSTD_c_checksumFlag:
-    case ZSTD_c_dictIDFlag:
-    case ZSTD_c_forceMaxWindow :
-    case ZSTD_c_nbWorkers:
-    case ZSTD_c_jobSize:
-    case ZSTD_c_overlapLog:
-    case ZSTD_c_rsyncable:
-    case ZSTD_c_enableDedicatedDictSearch:
-    case ZSTD_c_enableLongDistanceMatching:
-    case ZSTD_c_ldmHashLog:
-    case ZSTD_c_ldmMinMatch:
-    case ZSTD_c_ldmBucketSizeLog:
-    case ZSTD_c_ldmHashRateLog:
-    case ZSTD_c_forceAttachDict:
-    case ZSTD_c_literalCompressionMode:
-    case ZSTD_c_targetCBlockSize:
-    case ZSTD_c_srcSizeHint:
-    case ZSTD_c_stableInBuffer:
-    case ZSTD_c_stableOutBuffer:
-    case ZSTD_c_blockDelimiters:
-    case ZSTD_c_validateSequences:
-    case ZSTD_c_useBlockSplitter:
-    case ZSTD_c_useRowMatchFinder:
-    case ZSTD_c_deterministicRefPrefix:
-    default:
-        return 0;
-    }
-}
-
-size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
-{
-    DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value);
-    if (cctx->streamStage != zcss_init) {
-        if (ZSTD_isUpdateAuthorized(param)) {
-            cctx->cParamsChanged = 1;
-        } else {
-            RETURN_ERROR(stage_wrong, "can only set params in ctx init stage");
-    }   }
-
-    switch(param)
-    {
-    case ZSTD_c_nbWorkers:
-        RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported,
-                        "MT not compatible with static alloc");
-        break;
-
-    case ZSTD_c_compressionLevel:
-    case ZSTD_c_windowLog:
-    case ZSTD_c_hashLog:
-    case ZSTD_c_chainLog:
-    case ZSTD_c_searchLog:
-    case ZSTD_c_minMatch:
-    case ZSTD_c_targetLength:
-    case ZSTD_c_strategy:
-    case ZSTD_c_ldmHashRateLog:
-    case ZSTD_c_format:
-    case ZSTD_c_contentSizeFlag:
-    case ZSTD_c_checksumFlag:
-    case ZSTD_c_dictIDFlag:
-    case ZSTD_c_forceMaxWindow:
-    case ZSTD_c_forceAttachDict:
-    case ZSTD_c_literalCompressionMode:
-    case ZSTD_c_jobSize:
-    case ZSTD_c_overlapLog:
-    case ZSTD_c_rsyncable:
-    case ZSTD_c_enableDedicatedDictSearch:
-    case ZSTD_c_enableLongDistanceMatching:
-    case ZSTD_c_ldmHashLog:
-    case ZSTD_c_ldmMinMatch:
-    case ZSTD_c_ldmBucketSizeLog:
-    case ZSTD_c_targetCBlockSize:
-    case ZSTD_c_srcSizeHint:
-    case ZSTD_c_stableInBuffer:
-    case ZSTD_c_stableOutBuffer:
-    case ZSTD_c_blockDelimiters:
-    case ZSTD_c_validateSequences:
-    case ZSTD_c_useBlockSplitter:
-    case ZSTD_c_useRowMatchFinder:
-    case ZSTD_c_deterministicRefPrefix:
-        break;
-
-    default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
-    }
-    return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value);
-}
-
-size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
-                                    ZSTD_cParameter param, int value)
-{
-    DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value);
-    switch(param)
-    {
-    case ZSTD_c_format :
-        BOUNDCHECK(ZSTD_c_format, value);
-        CCtxParams->format = (ZSTD_format_e)value;
-        return (size_t)CCtxParams->format;
-
-    case ZSTD_c_compressionLevel : {
-        FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
-        if (value == 0)
-            CCtxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default */
-        else
-            CCtxParams->compressionLevel = value;
-        if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel;
-        return 0;  /* return type (size_t) cannot represent negative values */
-    }
-
-    case ZSTD_c_windowLog :
-        if (value!=0)   /* 0 => use default */
-            BOUNDCHECK(ZSTD_c_windowLog, value);
-        CCtxParams->cParams.windowLog = (U32)value;
-        return CCtxParams->cParams.windowLog;
-
-    case ZSTD_c_hashLog :
-        if (value!=0)   /* 0 => use default */
-            BOUNDCHECK(ZSTD_c_hashLog, value);
-        CCtxParams->cParams.hashLog = (U32)value;
-        return CCtxParams->cParams.hashLog;
-
-    case ZSTD_c_chainLog :
-        if (value!=0)   /* 0 => use default */
-            BOUNDCHECK(ZSTD_c_chainLog, value);
-        CCtxParams->cParams.chainLog = (U32)value;
-        return CCtxParams->cParams.chainLog;
-
-    case ZSTD_c_searchLog :
-        if (value!=0)   /* 0 => use default */
-            BOUNDCHECK(ZSTD_c_searchLog, value);
-        CCtxParams->cParams.searchLog = (U32)value;
-        return (size_t)value;
-
-    case ZSTD_c_minMatch :
-        if (value!=0)   /* 0 => use default */
-            BOUNDCHECK(ZSTD_c_minMatch, value);
-        CCtxParams->cParams.minMatch = value;
-        return CCtxParams->cParams.minMatch;
-
-    case ZSTD_c_targetLength :
-        BOUNDCHECK(ZSTD_c_targetLength, value);
-        CCtxParams->cParams.targetLength = value;
-        return CCtxParams->cParams.targetLength;
-
-    case ZSTD_c_strategy :
-        if (value!=0)   /* 0 => use default */
-            BOUNDCHECK(ZSTD_c_strategy, value);
-        CCtxParams->cParams.strategy = (ZSTD_strategy)value;
-        return (size_t)CCtxParams->cParams.strategy;
-
-    case ZSTD_c_contentSizeFlag :
-        /* Content size written in frame header _when known_ (default:1) */
-        DEBUGLOG(4, "set content size flag = %u", (value!=0));
-        CCtxParams->fParams.contentSizeFlag = value != 0;
-        return CCtxParams->fParams.contentSizeFlag;
-
-    case ZSTD_c_checksumFlag :
-        /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
-        CCtxParams->fParams.checksumFlag = value != 0;
-        return CCtxParams->fParams.checksumFlag;
-
-    case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
-        DEBUGLOG(4, "set dictIDFlag = %u", (value!=0));
-        CCtxParams->fParams.noDictIDFlag = !value;
-        return !CCtxParams->fParams.noDictIDFlag;
-
-    case ZSTD_c_forceMaxWindow :
-        CCtxParams->forceWindow = (value != 0);
-        return CCtxParams->forceWindow;
-
-    case ZSTD_c_forceAttachDict : {
-        const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
-        BOUNDCHECK(ZSTD_c_forceAttachDict, pref);
-        CCtxParams->attachDictPref = pref;
-        return CCtxParams->attachDictPref;
-    }
-
-    case ZSTD_c_literalCompressionMode : {
-        const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value;
-        BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm);
-        CCtxParams->literalCompressionMode = lcm;
-        return CCtxParams->literalCompressionMode;
-    }
-
-    case ZSTD_c_nbWorkers :
-#ifndef ZSTD_MULTITHREAD
-        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
-        return 0;
-#else
-        FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
-        CCtxParams->nbWorkers = value;
-        return CCtxParams->nbWorkers;
-#endif
-
-    case ZSTD_c_jobSize :
-#ifndef ZSTD_MULTITHREAD
-        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
-        return 0;
-#else
-        /* Adjust to the minimum non-default value. */
-        if (value != 0 && value < ZSTDMT_JOBSIZE_MIN)
-            value = ZSTDMT_JOBSIZE_MIN;
-        FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
-        assert(value >= 0);
-        CCtxParams->jobSize = value;
-        return CCtxParams->jobSize;
-#endif
-
-    case ZSTD_c_overlapLog :
-#ifndef ZSTD_MULTITHREAD
-        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
-        return 0;
-#else
-        FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
-        CCtxParams->overlapLog = value;
-        return CCtxParams->overlapLog;
-#endif
-
-    case ZSTD_c_rsyncable :
-#ifndef ZSTD_MULTITHREAD
-        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
-        return 0;
-#else
-        FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
-        CCtxParams->rsyncable = value;
-        return CCtxParams->rsyncable;
-#endif
-
-    case ZSTD_c_enableDedicatedDictSearch :
-        CCtxParams->enableDedicatedDictSearch = (value!=0);
-        return CCtxParams->enableDedicatedDictSearch;
-
-    case ZSTD_c_enableLongDistanceMatching :
-        CCtxParams->ldmParams.enableLdm = (ZSTD_paramSwitch_e)value;
-        return CCtxParams->ldmParams.enableLdm;
-
-    case ZSTD_c_ldmHashLog :
-        if (value!=0)   /* 0 ==> auto */
-            BOUNDCHECK(ZSTD_c_ldmHashLog, value);
-        CCtxParams->ldmParams.hashLog = value;
-        return CCtxParams->ldmParams.hashLog;
-
-    case ZSTD_c_ldmMinMatch :
-        if (value!=0)   /* 0 ==> default */
-            BOUNDCHECK(ZSTD_c_ldmMinMatch, value);
-        CCtxParams->ldmParams.minMatchLength = value;
-        return CCtxParams->ldmParams.minMatchLength;
-
-    case ZSTD_c_ldmBucketSizeLog :
-        if (value!=0)   /* 0 ==> default */
-            BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value);
-        CCtxParams->ldmParams.bucketSizeLog = value;
-        return CCtxParams->ldmParams.bucketSizeLog;
-
-    case ZSTD_c_ldmHashRateLog :
-        if (value!=0)   /* 0 ==> default */
-            BOUNDCHECK(ZSTD_c_ldmHashRateLog, value);
-        CCtxParams->ldmParams.hashRateLog = value;
-        return CCtxParams->ldmParams.hashRateLog;
-
-    case ZSTD_c_targetCBlockSize :
-        if (value!=0)   /* 0 ==> default */
-            BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
-        CCtxParams->targetCBlockSize = value;
-        return CCtxParams->targetCBlockSize;
-
-    case ZSTD_c_srcSizeHint :
-        if (value!=0)    /* 0 ==> default */
-            BOUNDCHECK(ZSTD_c_srcSizeHint, value);
-        CCtxParams->srcSizeHint = value;
-        return CCtxParams->srcSizeHint;
-
-    case ZSTD_c_stableInBuffer:
-        BOUNDCHECK(ZSTD_c_stableInBuffer, value);
-        CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value;
-        return CCtxParams->inBufferMode;
-
-    case ZSTD_c_stableOutBuffer:
-        BOUNDCHECK(ZSTD_c_stableOutBuffer, value);
-        CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value;
-        return CCtxParams->outBufferMode;
-
-    case ZSTD_c_blockDelimiters:
-        BOUNDCHECK(ZSTD_c_blockDelimiters, value);
-        CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value;
-        return CCtxParams->blockDelimiters;
-
-    case ZSTD_c_validateSequences:
-        BOUNDCHECK(ZSTD_c_validateSequences, value);
-        CCtxParams->validateSequences = value;
-        return CCtxParams->validateSequences;
-
-    case ZSTD_c_useBlockSplitter:
-        BOUNDCHECK(ZSTD_c_useBlockSplitter, value);
-        CCtxParams->useBlockSplitter = (ZSTD_paramSwitch_e)value;
-        return CCtxParams->useBlockSplitter;
-
-    case ZSTD_c_useRowMatchFinder:
-        BOUNDCHECK(ZSTD_c_useRowMatchFinder, value);
-        CCtxParams->useRowMatchFinder = (ZSTD_paramSwitch_e)value;
-        return CCtxParams->useRowMatchFinder;
-
-    case ZSTD_c_deterministicRefPrefix:
-        BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value);
-        CCtxParams->deterministicRefPrefix = !!value;
-        return CCtxParams->deterministicRefPrefix;
-
-    default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
-    }
-}
-
-size_t ZSTD_CCtx_getParameter(ZSTD_CCtx const* cctx, ZSTD_cParameter param, int* value)
-{
-    return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value);
-}
-
-size_t ZSTD_CCtxParams_getParameter(
-        ZSTD_CCtx_params const* CCtxParams, ZSTD_cParameter param, int* value)
-{
-    switch(param)
-    {
-    case ZSTD_c_format :
-        *value = CCtxParams->format;
-        break;
-    case ZSTD_c_compressionLevel :
-        *value = CCtxParams->compressionLevel;
-        break;
-    case ZSTD_c_windowLog :
-        *value = (int)CCtxParams->cParams.windowLog;
-        break;
-    case ZSTD_c_hashLog :
-        *value = (int)CCtxParams->cParams.hashLog;
-        break;
-    case ZSTD_c_chainLog :
-        *value = (int)CCtxParams->cParams.chainLog;
-        break;
-    case ZSTD_c_searchLog :
-        *value = CCtxParams->cParams.searchLog;
-        break;
-    case ZSTD_c_minMatch :
-        *value = CCtxParams->cParams.minMatch;
-        break;
-    case ZSTD_c_targetLength :
-        *value = CCtxParams->cParams.targetLength;
-        break;
-    case ZSTD_c_strategy :
-        *value = (unsigned)CCtxParams->cParams.strategy;
-        break;
-    case ZSTD_c_contentSizeFlag :
-        *value = CCtxParams->fParams.contentSizeFlag;
-        break;
-    case ZSTD_c_checksumFlag :
-        *value = CCtxParams->fParams.checksumFlag;
-        break;
-    case ZSTD_c_dictIDFlag :
-        *value = !CCtxParams->fParams.noDictIDFlag;
-        break;
-    case ZSTD_c_forceMaxWindow :
-        *value = CCtxParams->forceWindow;
-        break;
-    case ZSTD_c_forceAttachDict :
-        *value = CCtxParams->attachDictPref;
-        break;
-    case ZSTD_c_literalCompressionMode :
-        *value = CCtxParams->literalCompressionMode;
-        break;
-    case ZSTD_c_nbWorkers :
-#ifndef ZSTD_MULTITHREAD
-        assert(CCtxParams->nbWorkers == 0);
-#endif
-        *value = CCtxParams->nbWorkers;
-        break;
-    case ZSTD_c_jobSize :
-#ifndef ZSTD_MULTITHREAD
-        RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
-#else
-        assert(CCtxParams->jobSize <= INT_MAX);
-        *value = (int)CCtxParams->jobSize;
-        break;
-#endif
-    case ZSTD_c_overlapLog :
-#ifndef ZSTD_MULTITHREAD
-        RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
-#else
-        *value = CCtxParams->overlapLog;
-        break;
-#endif
-    case ZSTD_c_rsyncable :
-#ifndef ZSTD_MULTITHREAD
-        RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
-#else
-        *value = CCtxParams->rsyncable;
-        break;
-#endif
-    case ZSTD_c_enableDedicatedDictSearch :
-        *value = CCtxParams->enableDedicatedDictSearch;
-        break;
-    case ZSTD_c_enableLongDistanceMatching :
-        *value = CCtxParams->ldmParams.enableLdm;
-        break;
-    case ZSTD_c_ldmHashLog :
-        *value = CCtxParams->ldmParams.hashLog;
-        break;
-    case ZSTD_c_ldmMinMatch :
-        *value = CCtxParams->ldmParams.minMatchLength;
-        break;
-    case ZSTD_c_ldmBucketSizeLog :
-        *value = CCtxParams->ldmParams.bucketSizeLog;
-        break;
-    case ZSTD_c_ldmHashRateLog :
-        *value = CCtxParams->ldmParams.hashRateLog;
-        break;
-    case ZSTD_c_targetCBlockSize :
-        *value = (int)CCtxParams->targetCBlockSize;
-        break;
-    case ZSTD_c_srcSizeHint :
-        *value = (int)CCtxParams->srcSizeHint;
-        break;
-    case ZSTD_c_stableInBuffer :
-        *value = (int)CCtxParams->inBufferMode;
-        break;
-    case ZSTD_c_stableOutBuffer :
-        *value = (int)CCtxParams->outBufferMode;
-        break;
-    case ZSTD_c_blockDelimiters :
-        *value = (int)CCtxParams->blockDelimiters;
-        break;
-    case ZSTD_c_validateSequences :
-        *value = (int)CCtxParams->validateSequences;
-        break;
-    case ZSTD_c_useBlockSplitter :
-        *value = (int)CCtxParams->useBlockSplitter;
-        break;
-    case ZSTD_c_useRowMatchFinder :
-        *value = (int)CCtxParams->useRowMatchFinder;
-        break;
-    case ZSTD_c_deterministicRefPrefix:
-        *value = (int)CCtxParams->deterministicRefPrefix;
-        break;
-    default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
-    }
-    return 0;
-}
-
-/** ZSTD_CCtx_setParametersUsingCCtxParams() :
- *  just applies `params` into `cctx`
- *  no action is performed, parameters are merely stored.
- *  If ZSTDMT is enabled, parameters are pushed to cctx->mtctx.
- *    This is possible even if a compression is ongoing.
- *    In which case, new parameters will be applied on the fly, starting with next compression job.
- */
-size_t ZSTD_CCtx_setParametersUsingCCtxParams(
-        ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params)
-{
-    DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams");
-    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
-                    "The context is in the wrong stage!");
-    RETURN_ERROR_IF(cctx->cdict, stage_wrong,
-                    "Can't override parameters with cdict attached (some must "
-                    "be inherited from the cdict).");
-
-    cctx->requestedParams = *params;
-    return 0;
-}
-
-size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
-{
-    DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize);
-    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
-                    "Can't set pledgedSrcSize when not in init stage.");
-    cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
-    return 0;
-}
-
-static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(
-        int const compressionLevel,
-        size_t const dictSize);
-static int ZSTD_dedicatedDictSearch_isSupported(
-        const ZSTD_compressionParameters* cParams);
-static void ZSTD_dedicatedDictSearch_revertCParams(
-        ZSTD_compressionParameters* cParams);
-
-/**
- * Initializes the local dict using the requested parameters.
- * NOTE: This does not use the pledged src size, because it may be used for more
- * than one compression.
- */
-static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
-{
-    ZSTD_localDict* const dl = &cctx->localDict;
-    if (dl->dict == NULL) {
-        /* No local dictionary. */
-        assert(dl->dictBuffer == NULL);
-        assert(dl->cdict == NULL);
-        assert(dl->dictSize == 0);
-        return 0;
-    }
-    if (dl->cdict != NULL) {
-        assert(cctx->cdict == dl->cdict);
-        /* Local dictionary already initialized. */
-        return 0;
-    }
-    assert(dl->dictSize > 0);
-    assert(cctx->cdict == NULL);
-    assert(cctx->prefixDict.dict == NULL);
-
-    dl->cdict = ZSTD_createCDict_advanced2(
-            dl->dict,
-            dl->dictSize,
-            ZSTD_dlm_byRef,
-            dl->dictContentType,
-            &cctx->requestedParams,
-            cctx->customMem);
-    RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed");
-    cctx->cdict = dl->cdict;
-    return 0;
-}
-
-size_t ZSTD_CCtx_loadDictionary_advanced(
-        ZSTD_CCtx* cctx, const void* dict, size_t dictSize,
-        ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
-{
-    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
-                    "Can't load a dictionary when ctx is not in init stage.");
-    DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize);
-    ZSTD_clearAllDicts(cctx);  /* in case one already exists */
-    if (dict == NULL || dictSize == 0)  /* no dictionary mode */
-        return 0;
-    if (dictLoadMethod == ZSTD_dlm_byRef) {
-        cctx->localDict.dict = dict;
-    } else {
-        void* dictBuffer;
-        RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
-                        "no malloc for static CCtx");
-        dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem);
-        RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!");
-        ZSTD_memcpy(dictBuffer, dict, dictSize);
-        cctx->localDict.dictBuffer = dictBuffer;
-        cctx->localDict.dict = dictBuffer;
-    }
-    cctx->localDict.dictSize = dictSize;
-    cctx->localDict.dictContentType = dictContentType;
-    return 0;
-}
-
-size_t ZSTD_CCtx_loadDictionary_byReference(
-      ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
-{
-    return ZSTD_CCtx_loadDictionary_advanced(
-            cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
-}
-
-size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
-{
-    return ZSTD_CCtx_loadDictionary_advanced(
-            cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
-}
-
-
-size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
-{
-    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
-                    "Can't ref a dict when ctx not in init stage.");
-    /* Free the existing local cdict (if any) to save memory. */
-    ZSTD_clearAllDicts(cctx);
-    cctx->cdict = cdict;
-    return 0;
-}
-
-size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool)
-{
-    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
-                    "Can't ref a pool when ctx not in init stage.");
-    cctx->pool = pool;
-    return 0;
-}
-
-size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)
-{
-    return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent);
-}
-
-size_t ZSTD_CCtx_refPrefix_advanced(
-        ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
-{
-    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
-                    "Can't ref a prefix when ctx not in init stage.");
-    ZSTD_clearAllDicts(cctx);
-    if (prefix != NULL && prefixSize > 0) {
-        cctx->prefixDict.dict = prefix;
-        cctx->prefixDict.dictSize = prefixSize;
-        cctx->prefixDict.dictContentType = dictContentType;
-    }
-    return 0;
-}
-
-/*! ZSTD_CCtx_reset() :
- *  Also dumps dictionary */
-size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
-{
-    if ( (reset == ZSTD_reset_session_only)
-      || (reset == ZSTD_reset_session_and_parameters) ) {
-        cctx->streamStage = zcss_init;
-        cctx->pledgedSrcSizePlusOne = 0;
-    }
-    if ( (reset == ZSTD_reset_parameters)
-      || (reset == ZSTD_reset_session_and_parameters) ) {
-        RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
-                        "Can't reset parameters only when not in init stage.");
-        ZSTD_clearAllDicts(cctx);
-        return ZSTD_CCtxParams_reset(&cctx->requestedParams);
-    }
-    return 0;
-}
-
-
-/** ZSTD_checkCParams() :
-    control CParam values remain within authorized range.
-    @return : 0, or an error code if one value is beyond authorized range */
-size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
-{
-    BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog);
-    BOUNDCHECK(ZSTD_c_chainLog,  (int)cParams.chainLog);
-    BOUNDCHECK(ZSTD_c_hashLog,   (int)cParams.hashLog);
-    BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog);
-    BOUNDCHECK(ZSTD_c_minMatch,  (int)cParams.minMatch);
-    BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength);
-    BOUNDCHECK(ZSTD_c_strategy,  cParams.strategy);
-    return 0;
-}
-
-/** ZSTD_clampCParams() :
- *  make CParam values within valid range.
- *  @return : valid CParams */
-static ZSTD_compressionParameters
-ZSTD_clampCParams(ZSTD_compressionParameters cParams)
-{
-#   define CLAMP_TYPE(cParam, val, type) {                                \
-        ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);         \
-        if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound;      \
-        else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
-    }
-#   define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
-    CLAMP(ZSTD_c_windowLog, cParams.windowLog);
-    CLAMP(ZSTD_c_chainLog,  cParams.chainLog);
-    CLAMP(ZSTD_c_hashLog,   cParams.hashLog);
-    CLAMP(ZSTD_c_searchLog, cParams.searchLog);
-    CLAMP(ZSTD_c_minMatch,  cParams.minMatch);
-    CLAMP(ZSTD_c_targetLength,cParams.targetLength);
-    CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy);
-    return cParams;
-}
-
-/** ZSTD_cycleLog() :
- *  condition for correct operation : hashLog > 1 */
-U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
-{
-    U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
-    return hashLog - btScale;
-}
-
-/** ZSTD_dictAndWindowLog() :
- * Returns an adjusted window log that is large enough to fit the source and the dictionary.
- * The zstd format says that the entire dictionary is valid if one byte of the dictionary
- * is within the window. So the hashLog and chainLog should be large enough to reference both
- * the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing
- * the hashLog and windowLog.
- * NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN.
- */
-static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize)
-{
-    const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX;
-    /* No dictionary ==> No change */
-    if (dictSize == 0) {
-        return windowLog;
-    }
-    assert(windowLog <= ZSTD_WINDOWLOG_MAX);
-    assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */
-    {
-        U64 const windowSize = 1ULL << windowLog;
-        U64 const dictAndWindowSize = dictSize + windowSize;
-        /* If the window size is already large enough to fit both the source and the dictionary
-         * then just use the window size. Otherwise adjust so that it fits the dictionary and
-         * the window.
-         */
-        if (windowSize >= dictSize + srcSize) {
-            return windowLog; /* Window size large enough already */
-        } else if (dictAndWindowSize >= maxWindowSize) {
-            return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */
-        } else  {
-            return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1;
-        }
-    }
-}
-
-/** ZSTD_adjustCParams_internal() :
- *  optimize `cPar` for a specified input (`srcSize` and `dictSize`).
- *  mostly downsize to reduce memory consumption and initialization latency.
- * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known.
- * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`.
- *  note : `srcSize==0` means 0!
- *  condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */
-static ZSTD_compressionParameters
-ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
-                            unsigned long long srcSize,
-                            size_t dictSize,
-                            ZSTD_cParamMode_e mode)
-{
-    const U64 minSrcSize = 513; /* (1<<9) + 1 */
-    const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
-    assert(ZSTD_checkCParams(cPar)==0);
-
-    switch (mode) {
-    case ZSTD_cpm_unknown:
-    case ZSTD_cpm_noAttachDict:
-        /* If we don't know the source size, don't make any
-         * assumptions about it. We will already have selected
-         * smaller parameters if a dictionary is in use.
-         */
-        break;
-    case ZSTD_cpm_createCDict:
-        /* Assume a small source size when creating a dictionary
-         * with an unknown source size.
-         */
-        if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)
-            srcSize = minSrcSize;
-        break;
-    case ZSTD_cpm_attachDict:
-        /* Dictionary has its own dedicated parameters which have
-         * already been selected. We are selecting parameters
-         * for only the source.
-         */
-        dictSize = 0;
-        break;
-    default:
-        assert(0);
-        break;
-    }
-
-    /* resize windowLog if input is small enough, to use less memory */
-    if ( (srcSize < maxWindowResize)
-      && (dictSize < maxWindowResize) )  {
-        U32 const tSize = (U32)(srcSize + dictSize);
-        static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN;
-        U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN :
-                            ZSTD_highbit32(tSize-1) + 1;
-        if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
-    }
-    if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
-        U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize);
-        U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
-        if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1;
-        if (cycleLog > dictAndWindowLog)
-            cPar.chainLog -= (cycleLog - dictAndWindowLog);
-    }
-
-    if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
-        cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN;  /* minimum wlog required for valid frame header */
-
-    return cPar;
-}
-
-ZSTD_compressionParameters
-ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
-                   unsigned long long srcSize,
-                   size_t dictSize)
-{
-    cPar = ZSTD_clampCParams(cPar);   /* resulting cPar is necessarily valid (all parameters within range) */
-    if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
-    return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown);
-}
-
-static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
-static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
-
-static void ZSTD_overrideCParams(
-              ZSTD_compressionParameters* cParams,
-        const ZSTD_compressionParameters* overrides)
-{
-    if (overrides->windowLog)    cParams->windowLog    = overrides->windowLog;
-    if (overrides->hashLog)      cParams->hashLog      = overrides->hashLog;
-    if (overrides->chainLog)     cParams->chainLog     = overrides->chainLog;
-    if (overrides->searchLog)    cParams->searchLog    = overrides->searchLog;
-    if (overrides->minMatch)     cParams->minMatch     = overrides->minMatch;
-    if (overrides->targetLength) cParams->targetLength = overrides->targetLength;
-    if (overrides->strategy)     cParams->strategy     = overrides->strategy;
-}
-
-ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
-        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
-{
-    ZSTD_compressionParameters cParams;
-    if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {
-      srcSizeHint = CCtxParams->srcSizeHint;
-    }
-    cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode);
-    if (CCtxParams->ldmParams.enableLdm == ZSTD_ps_enable) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
-    ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);
-    assert(!ZSTD_checkCParams(cParams));
-    /* srcSizeHint == 0 means 0 */
-    return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode);
-}
-
-static size_t
-ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
-                       const ZSTD_paramSwitch_e useRowMatchFinder,
-                       const U32 enableDedicatedDictSearch,
-                       const U32 forCCtx)
-{
-    /* chain table size should be 0 for fast or row-hash strategies */
-    size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, enableDedicatedDictSearch && !forCCtx)
-                                ? ((size_t)1 << cParams->chainLog)
-                                : 0;
-    size_t const hSize = ((size_t)1) << cParams->hashLog;
-    U32    const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
-    size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
-    /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't
-     * surrounded by redzones in ASAN. */
-    size_t const tableSpace = chainSize * sizeof(U32)
-                            + hSize * sizeof(U32)
-                            + h3Size * sizeof(U32);
-    size_t const optPotentialSpace =
-        ZSTD_cwksp_aligned_alloc_size((MaxML+1) * sizeof(U32))
-      + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32))
-      + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32))
-      + ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32))
-      + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
-      + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
-    size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
-                                            ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16))
-                                            : 0;
-    size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
-                                ? optPotentialSpace
-                                : 0;
-    size_t const slackSpace = ZSTD_cwksp_slack_space_required();
-
-    /* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */
-    ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN >= 4 && ZSTD_WINDOWLOG_MIN >= 4 && ZSTD_CHAINLOG_MIN >= 4);
-    assert(useRowMatchFinder != ZSTD_ps_auto);
-
-    DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
-                (U32)chainSize, (U32)hSize, (U32)h3Size);
-    return tableSpace + optSpace + slackSpace + lazyAdditionalSpace;
-}
-
-static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
-        const ZSTD_compressionParameters* cParams,
-        const ldmParams_t* ldmParams,
-        const int isStatic,
-        const ZSTD_paramSwitch_e useRowMatchFinder,
-        const size_t buffInSize,
-        const size_t buffOutSize,
-        const U64 pledgedSrcSize)
-{
-    size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize);
-    size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
-    U32    const divider = (cParams->minMatch==3) ? 3 : 4;
-    size_t const maxNbSeq = blockSize / divider;
-    size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
-                            + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
-                            + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
-    size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);
-    size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
-    size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1);
-
-    size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams);
-    size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize);
-    size_t const ldmSeqSpace = ldmParams->enableLdm == ZSTD_ps_enable ?
-        ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
-
-
-    size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize)
-                             + ZSTD_cwksp_alloc_size(buffOutSize);
-
-    size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
-
-    size_t const neededSpace =
-        cctxSpace +
-        entropySpace +
-        blockStateSpace +
-        ldmSpace +
-        ldmSeqSpace +
-        matchStateSize +
-        tokenSpace +
-        bufferSpace;
-
-    DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
-    return neededSpace;
-}
-
-size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
-{
-    ZSTD_compressionParameters const cParams =
-                ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
-    ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder,
-                                                                               &cParams);
-
-    RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
-    /* estimateCCtxSize is for one-shot compression. So no buffers should
-     * be needed. However, we still allocate two 0-sized buffers, which can
-     * take space under ASAN. */
-    return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
-        &cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
-}
-
-size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
-{
-    ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams);
-    if (ZSTD_rowMatchFinderSupported(cParams.strategy)) {
-        /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
-        size_t noRowCCtxSize;
-        size_t rowCCtxSize;
-        initialParams.useRowMatchFinder = ZSTD_ps_disable;
-        noRowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
-        initialParams.useRowMatchFinder = ZSTD_ps_enable;
-        rowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
-        return MAX(noRowCCtxSize, rowCCtxSize);
-    } else {
-        return ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
-    }
-}
-
-static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
-{
-    int tier = 0;
-    size_t largestSize = 0;
-    static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN};
-    for (; tier < 4; ++tier) {
-        /* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */
-        ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict);
-        largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize);
-    }
-    return largestSize;
-}
-
-size_t ZSTD_estimateCCtxSize(int compressionLevel)
-{
-    int level;
-    size_t memBudget = 0;
-    for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
-        /* Ensure monotonically increasing memory usage as compression level increases */
-        size_t const newMB = ZSTD_estimateCCtxSize_internal(level);
-        if (newMB > memBudget) memBudget = newMB;
-    }
-    return memBudget;
-}
-
-size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
-{
-    RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
-    {   ZSTD_compressionParameters const cParams =
-                ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
-        size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
-        size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered)
-                ? ((size_t)1 << cParams.windowLog) + blockSize
-                : 0;
-        size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered)
-                ? ZSTD_compressBound(blockSize) + 1
-                : 0;
-        ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, &params->cParams);
-
-        return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
-            &cParams, &params->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
-            ZSTD_CONTENTSIZE_UNKNOWN);
-    }
-}
-
-size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
-{
-    ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams);
-    if (ZSTD_rowMatchFinderSupported(cParams.strategy)) {
-        /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
-        size_t noRowCCtxSize;
-        size_t rowCCtxSize;
-        initialParams.useRowMatchFinder = ZSTD_ps_disable;
-        noRowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
-        initialParams.useRowMatchFinder = ZSTD_ps_enable;
-        rowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
-        return MAX(noRowCCtxSize, rowCCtxSize);
-    } else {
-        return ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
-    }
-}
-
-static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
-{
-    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
-    return ZSTD_estimateCStreamSize_usingCParams(cParams);
-}
-
-size_t ZSTD_estimateCStreamSize(int compressionLevel)
-{
-    int level;
-    size_t memBudget = 0;
-    for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
-        size_t const newMB = ZSTD_estimateCStreamSize_internal(level);
-        if (newMB > memBudget) memBudget = newMB;
-    }
-    return memBudget;
-}
-
-/* ZSTD_getFrameProgression():
- * tells how much data has been consumed (input) and produced (output) for current frame.
- * able to count progression inside worker threads (non-blocking mode).
- */
-ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx)
-{
-#ifdef ZSTD_MULTITHREAD
-    if (cctx->appliedParams.nbWorkers > 0) {
-        return ZSTDMT_getFrameProgression(cctx->mtctx);
-    }
-#endif
-    {   ZSTD_frameProgression fp;
-        size_t const buffered = (cctx->inBuff == NULL) ? 0 :
-                                cctx->inBuffPos - cctx->inToCompress;
-        if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress);
-        assert(buffered <= ZSTD_BLOCKSIZE_MAX);
-        fp.ingested = cctx->consumedSrcSize + buffered;
-        fp.consumed = cctx->consumedSrcSize;
-        fp.produced = cctx->producedCSize;
-        fp.flushed  = cctx->producedCSize;   /* simplified; some data might still be left within streaming output buffer */
-        fp.currentJobID = 0;
-        fp.nbActiveWorkers = 0;
-        return fp;
-}   }
-
-/*! ZSTD_toFlushNow()
- *  Only useful for multithreading scenarios currently (nbWorkers >= 1).
- */
-size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx)
-{
-#ifdef ZSTD_MULTITHREAD
-    if (cctx->appliedParams.nbWorkers > 0) {
-        return ZSTDMT_toFlushNow(cctx->mtctx);
-    }
-#endif
-    (void)cctx;
-    return 0;   /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */
-}
-
-static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
-                                    ZSTD_compressionParameters cParams2)
-{
-    (void)cParams1;
-    (void)cParams2;
-    assert(cParams1.windowLog    == cParams2.windowLog);
-    assert(cParams1.chainLog     == cParams2.chainLog);
-    assert(cParams1.hashLog      == cParams2.hashLog);
-    assert(cParams1.searchLog    == cParams2.searchLog);
-    assert(cParams1.minMatch     == cParams2.minMatch);
-    assert(cParams1.targetLength == cParams2.targetLength);
-    assert(cParams1.strategy     == cParams2.strategy);
-}
-
-void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
-{
-    int i;
-    for (i = 0; i < ZSTD_REP_NUM; ++i)
-        bs->rep[i] = repStartValue[i];
-    bs->entropy.huf.repeatMode = HUF_repeat_none;
-    bs->entropy.fse.offcode_repeatMode = FSE_repeat_none;
-    bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none;
-    bs->entropy.fse.litlength_repeatMode = FSE_repeat_none;
-}
-
-/*! ZSTD_invalidateMatchState()
- *  Invalidate all the matches in the match finder tables.
- *  Requires nextSrc and base to be set (can be NULL).
- */
-static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
-{
-    ZSTD_window_clear(&ms->window);
-
-    ms->nextToUpdate = ms->window.dictLimit;
-    ms->loadedDictEnd = 0;
-    ms->opt.litLengthSum = 0;  /* force reset of btopt stats */
-    ms->dictMatchState = NULL;
-}
-
-/**
- * Controls, for this matchState reset, whether the tables need to be cleared /
- * prepared for the coming compression (ZSTDcrp_makeClean), or whether the
- * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a
- * subsequent operation will overwrite the table space anyways (e.g., copying
- * the matchState contents in from a CDict).
- */
-typedef enum {
-    ZSTDcrp_makeClean,
-    ZSTDcrp_leaveDirty
-} ZSTD_compResetPolicy_e;
-
-/**
- * Controls, for this matchState reset, whether indexing can continue where it
- * left off (ZSTDirp_continue), or whether it needs to be restarted from zero
- * (ZSTDirp_reset).
- */
-typedef enum {
-    ZSTDirp_continue,
-    ZSTDirp_reset
-} ZSTD_indexResetPolicy_e;
-
-typedef enum {
-    ZSTD_resetTarget_CDict,
-    ZSTD_resetTarget_CCtx
-} ZSTD_resetTarget_e;
-
-
-static size_t
-ZSTD_reset_matchState(ZSTD_matchState_t* ms,
-                      ZSTD_cwksp* ws,
-                const ZSTD_compressionParameters* cParams,
-                const ZSTD_paramSwitch_e useRowMatchFinder,
-                const ZSTD_compResetPolicy_e crp,
-                const ZSTD_indexResetPolicy_e forceResetIndex,
-                const ZSTD_resetTarget_e forWho)
-{
-    /* disable chain table allocation for fast or row-based strategies */
-    size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder,
-                                                     ms->dedicatedDictSearch && (forWho == ZSTD_resetTarget_CDict))
-                                ? ((size_t)1 << cParams->chainLog)
-                                : 0;
-    size_t const hSize = ((size_t)1) << cParams->hashLog;
-    U32    const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
-    size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
-
-    DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);
-    assert(useRowMatchFinder != ZSTD_ps_auto);
-    if (forceResetIndex == ZSTDirp_reset) {
-        ZSTD_window_init(&ms->window);
-        ZSTD_cwksp_mark_tables_dirty(ws);
-    }
-
-    ms->hashLog3 = hashLog3;
-
-    ZSTD_invalidateMatchState(ms);
-
-    assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */
-
-    ZSTD_cwksp_clear_tables(ws);
-
-    DEBUGLOG(5, "reserving table space");
-    /* table Space */
-    ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32));
-    ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32));
-    ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32));
-    RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
-                    "failed a workspace allocation in ZSTD_reset_matchState");
-
-    DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty);
-    if (crp!=ZSTDcrp_leaveDirty) {
-        /* reset tables only */
-        ZSTD_cwksp_clean_tables(ws);
-    }
-
-    /* opt parser space */
-    if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
-        DEBUGLOG(4, "reserving optimal parser space");
-        ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned));
-        ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
-        ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
-        ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
-        ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));
-        ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
-    }
-
-    if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
-        {   /* Row match finder needs an additional table of hashes ("tags") */
-            size_t const tagTableSize = hSize*sizeof(U16);
-            ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
-            if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize);
-        }
-        {   /* Switch to 32-entry rows if searchLog is 5 (or more) */
-            U32 const rowLog = BOUNDED(4, cParams->searchLog, 6);
-            assert(cParams->hashLog >= rowLog);
-            ms->rowHashLog = cParams->hashLog - rowLog;
-        }
-    }
-
-    ms->cParams = *cParams;
-
-    RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
-                    "failed a workspace allocation in ZSTD_reset_matchState");
-    return 0;
-}
-
-/* ZSTD_indexTooCloseToMax() :
- * minor optimization : prefer memset() rather than reduceIndex()
- * which is measurably slow in some circumstances (reported for Visual Studio).
- * Works when re-using a context for a lot of smallish inputs :
- * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN,
- * memset() will be triggered before reduceIndex().
- */
-#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)
-static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)
-{
-    return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
-}
-
-/** ZSTD_dictTooBig():
- * When dictionaries are larger than ZSTD_CHUNKSIZE_MAX they can't be loaded in
- * one go generically. So we ensure that in that case we reset the tables to zero,
- * so that we can load as much of the dictionary as possible.
- */
-static int ZSTD_dictTooBig(size_t const loadedDictSize)
-{
-    return loadedDictSize > ZSTD_CHUNKSIZE_MAX;
-}
-
-/*! ZSTD_resetCCtx_internal() :
- * @param loadedDictSize The size of the dictionary to be loaded
- * into the context, if any. If no dictionary is used, or the
- * dictionary is being attached / copied, then pass 0.
- * note : `params` are assumed fully validated at this stage.
- */
-static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
-                                      ZSTD_CCtx_params const* params,
-                                      U64 const pledgedSrcSize,
-                                      size_t const loadedDictSize,
-                                      ZSTD_compResetPolicy_e const crp,
-                                      ZSTD_buffered_policy_e const zbuff)
-{
-    ZSTD_cwksp* const ws = &zc->workspace;
-    DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d useBlockSplitter=%d",
-                (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->useBlockSplitter);
-    assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
-
-    zc->isFirstBlock = 1;
-
-    /* Set applied params early so we can modify them for LDM,
-     * and point params at the applied params.
-     */
-    zc->appliedParams = *params;
-    params = &zc->appliedParams;
-
-    assert(params->useRowMatchFinder != ZSTD_ps_auto);
-    assert(params->useBlockSplitter != ZSTD_ps_auto);
-    assert(params->ldmParams.enableLdm != ZSTD_ps_auto);
-    if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
-        /* Adjust long distance matching parameters */
-        ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, &params->cParams);
-        assert(params->ldmParams.hashLog >= params->ldmParams.bucketSizeLog);
-        assert(params->ldmParams.hashRateLog < 32);
-    }
-
-    {   size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
-        size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
-        U32    const divider = (params->cParams.minMatch==3) ? 3 : 4;
-        size_t const maxNbSeq = blockSize / divider;
-        size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
-                ? ZSTD_compressBound(blockSize) + 1
-                : 0;
-        size_t const buffInSize = (zbuff == ZSTDb_buffered && params->inBufferMode == ZSTD_bm_buffered)
-                ? windowSize + blockSize
-                : 0;
-        size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize);
-
-        int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window);
-        int const dictTooBig = ZSTD_dictTooBig(loadedDictSize);
-        ZSTD_indexResetPolicy_e needsIndexReset =
-            (indexTooClose || dictTooBig || !zc->initialized) ? ZSTDirp_reset : ZSTDirp_continue;
-
-        size_t const neededSpace =
-            ZSTD_estimateCCtxSize_usingCCtxParams_internal(
-                &params->cParams, &params->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
-                buffInSize, buffOutSize, pledgedSrcSize);
-        int resizeWorkspace;
-
-        FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
-
-        if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0);
-
-        {   /* Check if workspace is large enough, alloc a new one if needed */
-            int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
-            int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
-            resizeWorkspace = workspaceTooSmall || workspaceWasteful;
-            DEBUGLOG(4, "Need %zu B workspace", neededSpace);
-            DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
-
-            if (resizeWorkspace) {
-                DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",
-                            ZSTD_cwksp_sizeof(ws) >> 10,
-                            neededSpace >> 10);
-
-                RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize");
-
-                needsIndexReset = ZSTDirp_reset;
-
-                ZSTD_cwksp_free(ws, zc->customMem);
-                FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), "");
-
-                DEBUGLOG(5, "reserving object space");
-                /* Statically sized space.
-                 * entropyWorkspace never moves,
-                 * though prev/next block swap places */
-                assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t)));
-                zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
-                RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock");
-                zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
-                RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock");
-                zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE);
-                RETURN_ERROR_IF(zc->entropyWorkspace == NULL, memory_allocation, "couldn't allocate entropyWorkspace");
-        }   }
-
-        ZSTD_cwksp_clear(ws);
-
-        /* init params */
-        zc->blockState.matchState.cParams = params->cParams;
-        zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
-        zc->consumedSrcSize = 0;
-        zc->producedCSize = 0;
-        if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
-            zc->appliedParams.fParams.contentSizeFlag = 0;
-        DEBUGLOG(4, "pledged content size : %u ; flag : %u",
-            (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag);
-        zc->blockSize = blockSize;
-
-        XXH64_reset(&zc->xxhState, 0);
-        zc->stage = ZSTDcs_init;
-        zc->dictID = 0;
-        zc->dictContentSize = 0;
-
-        ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
-
-        /* ZSTD_wildcopy() is used to copy into the literals buffer,
-         * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
-         */
-        zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);
-        zc->seqStore.maxNbLit = blockSize;
-
-        /* buffers */
-        zc->bufferedPolicy = zbuff;
-        zc->inBuffSize = buffInSize;
-        zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
-        zc->outBuffSize = buffOutSize;
-        zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);
-
-        /* ldm bucketOffsets table */
-        if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
-            /* TODO: avoid memset? */
-            size_t const numBuckets =
-                  ((size_t)1) << (params->ldmParams.hashLog -
-                                  params->ldmParams.bucketSizeLog);
-            zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets);
-            ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets);
-        }
-
-        /* sequences storage */
-        ZSTD_referenceExternalSequences(zc, NULL, 0);
-        zc->seqStore.maxNbSeq = maxNbSeq;
-        zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
-        zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
-        zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
-        zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
-
-        FORWARD_IF_ERROR(ZSTD_reset_matchState(
-            &zc->blockState.matchState,
-            ws,
-            &params->cParams,
-            params->useRowMatchFinder,
-            crp,
-            needsIndexReset,
-            ZSTD_resetTarget_CCtx), "");
-
-        /* ldm hash table */
-        if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
-            /* TODO: avoid memset? */
-            size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
-            zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
-            ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
-            zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
-            zc->maxNbLdmSequences = maxNbLdmSeq;
-
-            ZSTD_window_init(&zc->ldmState.window);
-            zc->ldmState.loadedDictEnd = 0;
-        }
-
-        DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
-        assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace));
-
-        zc->initialized = 1;
-
-        return 0;
-    }
-}
-
-/* ZSTD_invalidateRepCodes() :
- * ensures next compression will not use repcodes from previous block.
- * Note : only works with regular variant;
- *        do not use with extDict variant ! */
-void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
-    int i;
-    for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0;
-    assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
-}
-
-/* These are the approximate sizes for each strategy past which copying the
- * dictionary tables into the working context is faster than using them
- * in-place.
- */
-static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = {
-    8 KB,  /* unused */
-    8 KB,  /* ZSTD_fast */
-    16 KB, /* ZSTD_dfast */
-    32 KB, /* ZSTD_greedy */
-    32 KB, /* ZSTD_lazy */
-    32 KB, /* ZSTD_lazy2 */
-    32 KB, /* ZSTD_btlazy2 */
-    32 KB, /* ZSTD_btopt */
-    8 KB,  /* ZSTD_btultra */
-    8 KB   /* ZSTD_btultra2 */
-};
-
-static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
-                                 const ZSTD_CCtx_params* params,
-                                 U64 pledgedSrcSize)
-{
-    size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy];
-    int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch;
-    return dedicatedDictSearch
-        || ( ( pledgedSrcSize <= cutoff
-            || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
-            || params->attachDictPref == ZSTD_dictForceAttach )
-          && params->attachDictPref != ZSTD_dictForceCopy
-          && !params->forceWindow ); /* dictMatchState isn't correctly
-                                      * handled in _enforceMaxDist */
-}
-
-static size_t
-ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
-                        const ZSTD_CDict* cdict,
-                        ZSTD_CCtx_params params,
-                        U64 pledgedSrcSize,
-                        ZSTD_buffered_policy_e zbuff)
-{
-    DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%llu",
-                (unsigned long long)pledgedSrcSize);
-    {
-        ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams;
-        unsigned const windowLog = params.cParams.windowLog;
-        assert(windowLog != 0);
-        /* Resize working context table params for input only, since the dict
-         * has its own tables. */
-        /* pledgedSrcSize == 0 means 0! */
-
-        if (cdict->matchState.dedicatedDictSearch) {
-            ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams);
-        }
-
-        params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
-                                                     cdict->dictContentSize, ZSTD_cpm_attachDict);
-        params.cParams.windowLog = windowLog;
-        params.useRowMatchFinder = cdict->useRowMatchFinder;    /* cdict overrides */
-        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, &params, pledgedSrcSize,
-                                                 /* loadedDictSize */ 0,
-                                                 ZSTDcrp_makeClean, zbuff), "");
-        assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy);
-    }
-
-    {   const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
-                                  - cdict->matchState.window.base);
-        const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;
-        if (cdictLen == 0) {
-            /* don't even attach dictionaries with no contents */
-            DEBUGLOG(4, "skipping attaching empty dictionary");
-        } else {
-            DEBUGLOG(4, "attaching dictionary into context");
-            cctx->blockState.matchState.dictMatchState = &cdict->matchState;
-
-            /* prep working match state so dict matches never have negative indices
-             * when they are translated to the working context's index space. */
-            if (cctx->blockState.matchState.window.dictLimit < cdictEnd) {
-                cctx->blockState.matchState.window.nextSrc =
-                    cctx->blockState.matchState.window.base + cdictEnd;
-                ZSTD_window_clear(&cctx->blockState.matchState.window);
-            }
-            /* loadedDictEnd is expressed within the referential of the active context */
-            cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
-    }   }
-
-    cctx->dictID = cdict->dictID;
-    cctx->dictContentSize = cdict->dictContentSize;
-
-    /* copy block state */
-    ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
-
-    return 0;
-}
-
-static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
-                            const ZSTD_CDict* cdict,
-                            ZSTD_CCtx_params params,
-                            U64 pledgedSrcSize,
-                            ZSTD_buffered_policy_e zbuff)
-{
-    const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
-
-    assert(!cdict->matchState.dedicatedDictSearch);
-    DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%llu",
-                (unsigned long long)pledgedSrcSize);
-
-    {   unsigned const windowLog = params.cParams.windowLog;
-        assert(windowLog != 0);
-        /* Copy only compression parameters related to tables. */
-        params.cParams = *cdict_cParams;
-        params.cParams.windowLog = windowLog;
-        params.useRowMatchFinder = cdict->useRowMatchFinder;
-        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, &params, pledgedSrcSize,
-                                                 /* loadedDictSize */ 0,
-                                                 ZSTDcrp_leaveDirty, zbuff), "");
-        assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
-        assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);
-        assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog);
-    }
-
-    ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);
-    assert(params.useRowMatchFinder != ZSTD_ps_auto);
-
-    /* copy tables */
-    {   size_t const chainSize = ZSTD_allocateChainTable(cdict_cParams->strategy, cdict->useRowMatchFinder, 0 /* DDS guaranteed disabled */)
-                                                            ? ((size_t)1 << cdict_cParams->chainLog)
-                                                            : 0;
-        size_t const hSize =  (size_t)1 << cdict_cParams->hashLog;
-
-        ZSTD_memcpy(cctx->blockState.matchState.hashTable,
-               cdict->matchState.hashTable,
-               hSize * sizeof(U32));
-        /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */
-        if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) {
-            ZSTD_memcpy(cctx->blockState.matchState.chainTable,
-               cdict->matchState.chainTable,
-               chainSize * sizeof(U32));
-        }
-        /* copy tag table */
-        if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) {
-            size_t const tagTableSize = hSize*sizeof(U16);
-            ZSTD_memcpy(cctx->blockState.matchState.tagTable,
-                cdict->matchState.tagTable,
-                tagTableSize);
-        }
-    }
-
-    /* Zero the hashTable3, since the cdict never fills it */
-    {   int const h3log = cctx->blockState.matchState.hashLog3;
-        size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
-        assert(cdict->matchState.hashLog3 == 0);
-        ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));
-    }
-
-    ZSTD_cwksp_mark_tables_clean(&cctx->workspace);
-
-    /* copy dictionary offsets */
-    {   ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
-        ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
-        dstMatchState->window       = srcMatchState->window;
-        dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
-        dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
-    }
-
-    cctx->dictID = cdict->dictID;
-    cctx->dictContentSize = cdict->dictContentSize;
-
-    /* copy block state */
-    ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
-
-    return 0;
-}
-
-/* We have a choice between copying the dictionary context into the working
- * context, or referencing the dictionary context from the working context
- * in-place. We decide here which strategy to use. */
-static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
-                            const ZSTD_CDict* cdict,
-                            const ZSTD_CCtx_params* params,
-                            U64 pledgedSrcSize,
-                            ZSTD_buffered_policy_e zbuff)
-{
-
-    DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)",
-                (unsigned)pledgedSrcSize);
-
-    if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) {
-        return ZSTD_resetCCtx_byAttachingCDict(
-            cctx, cdict, *params, pledgedSrcSize, zbuff);
-    } else {
-        return ZSTD_resetCCtx_byCopyingCDict(
-            cctx, cdict, *params, pledgedSrcSize, zbuff);
-    }
-}
-
-/*! ZSTD_copyCCtx_internal() :
- *  Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
- *  Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
- *  The "context", in this case, refers to the hash and chain tables,
- *  entropy tables, and dictionary references.
- * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx.
- * @return : 0, or an error code */
-static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
-                            const ZSTD_CCtx* srcCCtx,
-                            ZSTD_frameParameters fParams,
-                            U64 pledgedSrcSize,
-                            ZSTD_buffered_policy_e zbuff)
-{
-    RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong,
-                    "Can't copy a ctx that's not in init stage.");
-    DEBUGLOG(5, "ZSTD_copyCCtx_internal");
-    ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
-    {   ZSTD_CCtx_params params = dstCCtx->requestedParams;
-        /* Copy only compression parameters related to tables. */
-        params.cParams = srcCCtx->appliedParams.cParams;
-        assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_ps_auto);
-        assert(srcCCtx->appliedParams.useBlockSplitter != ZSTD_ps_auto);
-        assert(srcCCtx->appliedParams.ldmParams.enableLdm != ZSTD_ps_auto);
-        params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder;
-        params.useBlockSplitter = srcCCtx->appliedParams.useBlockSplitter;
-        params.ldmParams = srcCCtx->appliedParams.ldmParams;
-        params.fParams = fParams;
-        ZSTD_resetCCtx_internal(dstCCtx, &params, pledgedSrcSize,
-                                /* loadedDictSize */ 0,
-                                ZSTDcrp_leaveDirty, zbuff);
-        assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);
-        assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);
-        assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog);
-        assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog);
-        assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3);
-    }
-
-    ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace);
-
-    /* copy tables */
-    {   size_t const chainSize = ZSTD_allocateChainTable(srcCCtx->appliedParams.cParams.strategy,
-                                                         srcCCtx->appliedParams.useRowMatchFinder,
-                                                         0 /* forDDSDict */)
-                                    ? ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog)
-                                    : 0;
-        size_t const hSize =  (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
-        int const h3log = srcCCtx->blockState.matchState.hashLog3;
-        size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
-
-        ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable,
-               srcCCtx->blockState.matchState.hashTable,
-               hSize * sizeof(U32));
-        ZSTD_memcpy(dstCCtx->blockState.matchState.chainTable,
-               srcCCtx->blockState.matchState.chainTable,
-               chainSize * sizeof(U32));
-        ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable3,
-               srcCCtx->blockState.matchState.hashTable3,
-               h3Size * sizeof(U32));
-    }
-
-    ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace);
-
-    /* copy dictionary offsets */
-    {
-        const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState;
-        ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
-        dstMatchState->window       = srcMatchState->window;
-        dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
-        dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
-    }
-    dstCCtx->dictID = srcCCtx->dictID;
-    dstCCtx->dictContentSize = srcCCtx->dictContentSize;
-
-    /* copy block state */
-    ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock));
-
-    return 0;
-}
-
-/*! ZSTD_copyCCtx() :
- *  Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
- *  Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
- *  pledgedSrcSize==0 means "unknown".
-*   @return : 0, or an error code */
-size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
-{
-    ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
-    ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy;
-    ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1);
-    if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
-    fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN);
-
-    return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx,
-                                fParams, pledgedSrcSize,
-                                zbuff);
-}
-
-
-#define ZSTD_ROWSIZE 16
-/*! ZSTD_reduceTable() :
- *  reduce table indexes by `reducerValue`, or squash to zero.
- *  PreserveMark preserves "unsorted mark" for btlazy2 strategy.
- *  It must be set to a clear 0/1 value, to remove branch during inlining.
- *  Presume table size is a multiple of ZSTD_ROWSIZE
- *  to help auto-vectorization */
-FORCE_INLINE_TEMPLATE void
-ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark)
-{
-    int const nbRows = (int)size / ZSTD_ROWSIZE;
-    int cellNb = 0;
-    int rowNb;
-    /* Protect special index values < ZSTD_WINDOW_START_INDEX. */
-    U32 const reducerThreshold = reducerValue + ZSTD_WINDOW_START_INDEX;
-    assert((size & (ZSTD_ROWSIZE-1)) == 0);  /* multiple of ZSTD_ROWSIZE */
-    assert(size < (1U<<31));   /* can be casted to int */
-
-#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
-    /* To validate that the table re-use logic is sound, and that we don't
-     * access table space that we haven't cleaned, we re-"poison" the table
-     * space every time we mark it dirty.
-     *
-     * This function however is intended to operate on those dirty tables and
-     * re-clean them. So when this function is used correctly, we can unpoison
-     * the memory it operated on. This introduces a blind spot though, since
-     * if we now try to operate on __actually__ poisoned memory, we will not
-     * detect that. */
-    __msan_unpoison(table, size * sizeof(U32));
-#endif
-
-    for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
-        int column;
-        for (column=0; column<ZSTD_ROWSIZE; column++) {
-            U32 newVal;
-            if (preserveMark && table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) {
-                /* This write is pointless, but is required(?) for the compiler
-                 * to auto-vectorize the loop. */
-                newVal = ZSTD_DUBT_UNSORTED_MARK;
-            } else if (table[cellNb] < reducerThreshold) {
-                newVal = 0;
-            } else {
-                newVal = table[cellNb] - reducerValue;
-            }
-            table[cellNb] = newVal;
-            cellNb++;
-    }   }
-}
-
-static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue)
-{
-    ZSTD_reduceTable_internal(table, size, reducerValue, 0);
-}
-
-static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue)
-{
-    ZSTD_reduceTable_internal(table, size, reducerValue, 1);
-}
-
-/*! ZSTD_reduceIndex() :
-*   rescale all indexes to avoid future overflow (indexes are U32) */
-static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue)
-{
-    {   U32 const hSize = (U32)1 << params->cParams.hashLog;
-        ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
-    }
-
-    if (ZSTD_allocateChainTable(params->cParams.strategy, params->useRowMatchFinder, (U32)ms->dedicatedDictSearch)) {
-        U32 const chainSize = (U32)1 << params->cParams.chainLog;
-        if (params->cParams.strategy == ZSTD_btlazy2)
-            ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
-        else
-            ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
-    }
-
-    if (ms->hashLog3) {
-        U32 const h3Size = (U32)1 << ms->hashLog3;
-        ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue);
-    }
-}
-
-
-/*-*******************************************************
-*  Block entropic compression
-*********************************************************/
-
-/* See doc/zstd_compression_format.md for detailed format description */
-
-void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
-{
-    const seqDef* const sequences = seqStorePtr->sequencesStart;
-    BYTE* const llCodeTable = seqStorePtr->llCode;
-    BYTE* const ofCodeTable = seqStorePtr->ofCode;
-    BYTE* const mlCodeTable = seqStorePtr->mlCode;
-    U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
-    U32 u;
-    assert(nbSeq <= seqStorePtr->maxNbSeq);
-    for (u=0; u<nbSeq; u++) {
-        U32 const llv = sequences[u].litLength;
-        U32 const mlv = sequences[u].mlBase;
-        llCodeTable[u] = (BYTE)ZSTD_LLcode(llv);
-        ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offBase);
-        mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
-    }
-    if (seqStorePtr->longLengthType==ZSTD_llt_literalLength)
-        llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
-    if (seqStorePtr->longLengthType==ZSTD_llt_matchLength)
-        mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
-}
-
-/* ZSTD_useTargetCBlockSize():
- * Returns if target compressed block size param is being used.
- * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize.
- * Returns 1 if true, 0 otherwise. */
-static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams)
-{
-    DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize);
-    return (cctxParams->targetCBlockSize != 0);
-}
-
-/* ZSTD_blockSplitterEnabled():
- * Returns if block splitting param is being used
- * If used, compression will do best effort to split a block in order to improve compression ratio.
- * At the time this function is called, the parameter must be finalized.
- * Returns 1 if true, 0 otherwise. */
-static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams)
-{
-    DEBUGLOG(5, "ZSTD_blockSplitterEnabled (useBlockSplitter=%d)", cctxParams->useBlockSplitter);
-    assert(cctxParams->useBlockSplitter != ZSTD_ps_auto);
-    return (cctxParams->useBlockSplitter == ZSTD_ps_enable);
-}
-
-/* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types
- * and size of the sequences statistics
- */
-typedef struct {
-    U32 LLtype;
-    U32 Offtype;
-    U32 MLtype;
-    size_t size;
-    size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
-} ZSTD_symbolEncodingTypeStats_t;
-
-/* ZSTD_buildSequencesStatistics():
- * Returns a ZSTD_symbolEncodingTypeStats_t, or a zstd error code in the `size` field.
- * Modifies `nextEntropy` to have the appropriate values as a side effect.
- * nbSeq must be greater than 0.
- *
- * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32)
- */
-static ZSTD_symbolEncodingTypeStats_t
-ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
-                        const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy,
-                              BYTE* dst, const BYTE* const dstEnd,
-                              ZSTD_strategy strategy, unsigned* countWorkspace,
-                              void* entropyWorkspace, size_t entropyWkspSize) {
-    BYTE* const ostart = dst;
-    const BYTE* const oend = dstEnd;
-    BYTE* op = ostart;
-    FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
-    FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
-    FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
-    const BYTE* const ofCodeTable = seqStorePtr->ofCode;
-    const BYTE* const llCodeTable = seqStorePtr->llCode;
-    const BYTE* const mlCodeTable = seqStorePtr->mlCode;
-    ZSTD_symbolEncodingTypeStats_t stats;
-
-    stats.lastCountSize = 0;
-    /* convert length/distances into codes */
-    ZSTD_seqToCodes(seqStorePtr);
-    assert(op <= oend);
-    assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */
-    /* build CTable for Literal Lengths */
-    {   unsigned max = MaxLL;
-        size_t const mostFrequent = HIST_countFast_wksp(countWorkspace, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
-        DEBUGLOG(5, "Building LL table");
-        nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
-        stats.LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
-                                        countWorkspace, max, mostFrequent, nbSeq,
-                                        LLFSELog, prevEntropy->litlengthCTable,
-                                        LL_defaultNorm, LL_defaultNormLog,
-                                        ZSTD_defaultAllowed, strategy);
-        assert(set_basic < set_compressed && set_rle < set_compressed);
-        assert(!(stats.LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(
-                op, (size_t)(oend - op),
-                CTable_LitLength, LLFSELog, (symbolEncodingType_e)stats.LLtype,
-                countWorkspace, max, llCodeTable, nbSeq,
-                LL_defaultNorm, LL_defaultNormLog, MaxLL,
-                prevEntropy->litlengthCTable,
-                sizeof(prevEntropy->litlengthCTable),
-                entropyWorkspace, entropyWkspSize);
-            if (ZSTD_isError(countSize)) {
-                DEBUGLOG(3, "ZSTD_buildCTable for LitLens failed");
-                stats.size = countSize;
-                return stats;
-            }
-            if (stats.LLtype == set_compressed)
-                stats.lastCountSize = countSize;
-            op += countSize;
-            assert(op <= oend);
-    }   }
-    /* build CTable for Offsets */
-    {   unsigned max = MaxOff;
-        size_t const mostFrequent = HIST_countFast_wksp(
-            countWorkspace, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);  /* can't fail */
-        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
-        ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
-        DEBUGLOG(5, "Building OF table");
-        nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
-        stats.Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
-                                        countWorkspace, max, mostFrequent, nbSeq,
-                                        OffFSELog, prevEntropy->offcodeCTable,
-                                        OF_defaultNorm, OF_defaultNormLog,
-                                        defaultPolicy, strategy);
-        assert(!(stats.Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(
-                op, (size_t)(oend - op),
-                CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)stats.Offtype,
-                countWorkspace, max, ofCodeTable, nbSeq,
-                OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
-                prevEntropy->offcodeCTable,
-                sizeof(prevEntropy->offcodeCTable),
-                entropyWorkspace, entropyWkspSize);
-            if (ZSTD_isError(countSize)) {
-                DEBUGLOG(3, "ZSTD_buildCTable for Offsets failed");
-                stats.size = countSize;
-                return stats;
-            }
-            if (stats.Offtype == set_compressed)
-                stats.lastCountSize = countSize;
-            op += countSize;
-            assert(op <= oend);
-    }   }
-    /* build CTable for MatchLengths */
-    {   unsigned max = MaxML;
-        size_t const mostFrequent = HIST_countFast_wksp(
-            countWorkspace, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
-        DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
-        nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
-        stats.MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
-                                        countWorkspace, max, mostFrequent, nbSeq,
-                                        MLFSELog, prevEntropy->matchlengthCTable,
-                                        ML_defaultNorm, ML_defaultNormLog,
-                                        ZSTD_defaultAllowed, strategy);
-        assert(!(stats.MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(
-                op, (size_t)(oend - op),
-                CTable_MatchLength, MLFSELog, (symbolEncodingType_e)stats.MLtype,
-                countWorkspace, max, mlCodeTable, nbSeq,
-                ML_defaultNorm, ML_defaultNormLog, MaxML,
-                prevEntropy->matchlengthCTable,
-                sizeof(prevEntropy->matchlengthCTable),
-                entropyWorkspace, entropyWkspSize);
-            if (ZSTD_isError(countSize)) {
-                DEBUGLOG(3, "ZSTD_buildCTable for MatchLengths failed");
-                stats.size = countSize;
-                return stats;
-            }
-            if (stats.MLtype == set_compressed)
-                stats.lastCountSize = countSize;
-            op += countSize;
-            assert(op <= oend);
-    }   }
-    stats.size = (size_t)(op-ostart);
-    return stats;
-}
-
-/* ZSTD_entropyCompressSeqStore_internal():
- * compresses both literals and sequences
- * Returns compressed size of block, or a zstd error.
- */
-#define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 20
-MEM_STATIC size_t
-ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
-                          const ZSTD_entropyCTables_t* prevEntropy,
-                                ZSTD_entropyCTables_t* nextEntropy,
-                          const ZSTD_CCtx_params* cctxParams,
-                                void* dst, size_t dstCapacity,
-                                void* entropyWorkspace, size_t entropyWkspSize,
-                          const int bmi2)
-{
-    const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
-    ZSTD_strategy const strategy = cctxParams->cParams.strategy;
-    unsigned* count = (unsigned*)entropyWorkspace;
-    FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
-    FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
-    FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
-    const seqDef* const sequences = seqStorePtr->sequencesStart;
-    const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
-    const BYTE* const ofCodeTable = seqStorePtr->ofCode;
-    const BYTE* const llCodeTable = seqStorePtr->llCode;
-    const BYTE* const mlCodeTable = seqStorePtr->mlCode;
-    BYTE* const ostart = (BYTE*)dst;
-    BYTE* const oend = ostart + dstCapacity;
-    BYTE* op = ostart;
-    size_t lastCountSize;
-
-    entropyWorkspace = count + (MaxSeq + 1);
-    entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
-
-    DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)", nbSeq);
-    ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
-    assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
-
-    /* Compress literals */
-    {   const BYTE* const literals = seqStorePtr->litStart;
-        size_t const numSequences = seqStorePtr->sequences - seqStorePtr->sequencesStart;
-        size_t const numLiterals = seqStorePtr->lit - seqStorePtr->litStart;
-        /* Base suspicion of uncompressibility on ratio of literals to sequences */
-        unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO);
-        size_t const litSize = (size_t)(seqStorePtr->lit - literals);
-        size_t const cSize = ZSTD_compressLiterals(
-                                    &prevEntropy->huf, &nextEntropy->huf,
-                                    cctxParams->cParams.strategy,
-                                    ZSTD_literalsCompressionIsDisabled(cctxParams),
-                                    op, dstCapacity,
-                                    literals, litSize,
-                                    entropyWorkspace, entropyWkspSize,
-                                    bmi2, suspectUncompressible);
-        FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");
-        assert(cSize <= dstCapacity);
-        op += cSize;
-    }
-
-    /* Sequences Header */
-    RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
-                    dstSize_tooSmall, "Can't fit seq hdr in output buf!");
-    if (nbSeq < 128) {
-        *op++ = (BYTE)nbSeq;
-    } else if (nbSeq < LONGNBSEQ) {
-        op[0] = (BYTE)((nbSeq>>8) + 0x80);
-        op[1] = (BYTE)nbSeq;
-        op+=2;
-    } else {
-        op[0]=0xFF;
-        MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ));
-        op+=3;
-    }
-    assert(op <= oend);
-    if (nbSeq==0) {
-        /* Copy the old tables over as if we repeated them */
-        ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
-        return (size_t)(op - ostart);
-    }
-    {
-        ZSTD_symbolEncodingTypeStats_t stats;
-        BYTE* seqHead = op++;
-        /* build stats for sequences */
-        stats = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
-                                             &prevEntropy->fse, &nextEntropy->fse,
-                                              op, oend,
-                                              strategy, count,
-                                              entropyWorkspace, entropyWkspSize);
-        FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!");
-        *seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2));
-        lastCountSize = stats.lastCountSize;
-        op += stats.size;
-    }
-
-    {   size_t const bitstreamSize = ZSTD_encodeSequences(
-                                        op, (size_t)(oend - op),
-                                        CTable_MatchLength, mlCodeTable,
-                                        CTable_OffsetBits, ofCodeTable,
-                                        CTable_LitLength, llCodeTable,
-                                        sequences, nbSeq,
-                                        longOffsets, bmi2);
-        FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");
-        op += bitstreamSize;
-        assert(op <= oend);
-        /* zstd versions <= 1.3.4 mistakenly report corruption when
-         * FSE_readNCount() receives a buffer < 4 bytes.
-         * Fixed by https://github.com/facebook/zstd/pull/1146.
-         * This can happen when the last set_compressed table present is 2
-         * bytes and the bitstream is only one byte.
-         * In this exceedingly rare case, we will simply emit an uncompressed
-         * block, since it isn't worth optimizing.
-         */
-        if (lastCountSize && (lastCountSize + bitstreamSize) < 4) {
-            /* lastCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
-            assert(lastCountSize + bitstreamSize == 3);
-            DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
-                        "emitting an uncompressed block.");
-            return 0;
-        }
-    }
-
-    DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart));
-    return (size_t)(op - ostart);
-}
-
-MEM_STATIC size_t
-ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr,
-                       const ZSTD_entropyCTables_t* prevEntropy,
-                             ZSTD_entropyCTables_t* nextEntropy,
-                       const ZSTD_CCtx_params* cctxParams,
-                             void* dst, size_t dstCapacity,
-                             size_t srcSize,
-                             void* entropyWorkspace, size_t entropyWkspSize,
-                             int bmi2)
-{
-    size_t const cSize = ZSTD_entropyCompressSeqStore_internal(
-                            seqStorePtr, prevEntropy, nextEntropy, cctxParams,
-                            dst, dstCapacity,
-                            entropyWorkspace, entropyWkspSize, bmi2);
-    if (cSize == 0) return 0;
-    /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
-     * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
-     */
-    if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
-        return 0;  /* block not compressed */
-    FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed");
-
-    /* Check compressibility */
-    {   size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
-        if (cSize >= maxCSize) return 0;  /* block not compressed */
-    }
-    DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize);
-    return cSize;
-}
-
-/* ZSTD_selectBlockCompressor() :
- * Not static, but internal use only (used by long distance matcher)
- * assumption : strat is a valid strategy */
-ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e useRowMatchFinder, ZSTD_dictMode_e dictMode)
-{
-    static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
-        { ZSTD_compressBlock_fast  /* default for 0 */,
-          ZSTD_compressBlock_fast,
-          ZSTD_compressBlock_doubleFast,
-          ZSTD_compressBlock_greedy,
-          ZSTD_compressBlock_lazy,
-          ZSTD_compressBlock_lazy2,
-          ZSTD_compressBlock_btlazy2,
-          ZSTD_compressBlock_btopt,
-          ZSTD_compressBlock_btultra,
-          ZSTD_compressBlock_btultra2 },
-        { ZSTD_compressBlock_fast_extDict  /* default for 0 */,
-          ZSTD_compressBlock_fast_extDict,
-          ZSTD_compressBlock_doubleFast_extDict,
-          ZSTD_compressBlock_greedy_extDict,
-          ZSTD_compressBlock_lazy_extDict,
-          ZSTD_compressBlock_lazy2_extDict,
-          ZSTD_compressBlock_btlazy2_extDict,
-          ZSTD_compressBlock_btopt_extDict,
-          ZSTD_compressBlock_btultra_extDict,
-          ZSTD_compressBlock_btultra_extDict },
-        { ZSTD_compressBlock_fast_dictMatchState  /* default for 0 */,
-          ZSTD_compressBlock_fast_dictMatchState,
-          ZSTD_compressBlock_doubleFast_dictMatchState,
-          ZSTD_compressBlock_greedy_dictMatchState,
-          ZSTD_compressBlock_lazy_dictMatchState,
-          ZSTD_compressBlock_lazy2_dictMatchState,
-          ZSTD_compressBlock_btlazy2_dictMatchState,
-          ZSTD_compressBlock_btopt_dictMatchState,
-          ZSTD_compressBlock_btultra_dictMatchState,
-          ZSTD_compressBlock_btultra_dictMatchState },
-        { NULL  /* default for 0 */,
-          NULL,
-          NULL,
-          ZSTD_compressBlock_greedy_dedicatedDictSearch,
-          ZSTD_compressBlock_lazy_dedicatedDictSearch,
-          ZSTD_compressBlock_lazy2_dedicatedDictSearch,
-          NULL,
-          NULL,
-          NULL,
-          NULL }
-    };
-    ZSTD_blockCompressor selectedCompressor;
-    ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
-
-    assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
-    DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder);
-    if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) {
-        static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = {
-            { ZSTD_compressBlock_greedy_row,
-            ZSTD_compressBlock_lazy_row,
-            ZSTD_compressBlock_lazy2_row },
-            { ZSTD_compressBlock_greedy_extDict_row,
-            ZSTD_compressBlock_lazy_extDict_row,
-            ZSTD_compressBlock_lazy2_extDict_row },
-            { ZSTD_compressBlock_greedy_dictMatchState_row,
-            ZSTD_compressBlock_lazy_dictMatchState_row,
-            ZSTD_compressBlock_lazy2_dictMatchState_row },
-            { ZSTD_compressBlock_greedy_dedicatedDictSearch_row,
-            ZSTD_compressBlock_lazy_dedicatedDictSearch_row,
-            ZSTD_compressBlock_lazy2_dedicatedDictSearch_row }
-        };
-        DEBUGLOG(4, "Selecting a row-based matchfinder");
-        assert(useRowMatchFinder != ZSTD_ps_auto);
-        selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy];
-    } else {
-        selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
-    }
-    assert(selectedCompressor != NULL);
-    return selectedCompressor;
-}
-
-static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,
-                                   const BYTE* anchor, size_t lastLLSize)
-{
-    ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize);
-    seqStorePtr->lit += lastLLSize;
-}
-
-void ZSTD_resetSeqStore(seqStore_t* ssPtr)
-{
-    ssPtr->lit = ssPtr->litStart;
-    ssPtr->sequences = ssPtr->sequencesStart;
-    ssPtr->longLengthType = ZSTD_llt_none;
-}
-
-typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
-
-static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
-{
-    ZSTD_matchState_t* const ms = &zc->blockState.matchState;
-    DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize);
-    assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
-    /* Assert that we have correctly flushed the ctx params into the ms's copy */
-    ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
-    if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
-        if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) {
-            ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize);
-        } else {
-            ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);
-        }
-        return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */
-    }
-    ZSTD_resetSeqStore(&(zc->seqStore));
-    /* required for optimal parser to read stats from dictionary */
-    ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy;
-    /* tell the optimal parser how we expect to compress literals */
-    ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode;
-    /* a gap between an attached dict and the current window is not safe,
-     * they must remain adjacent,
-     * and when that stops being the case, the dict must be unset */
-    assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit);
-
-    /* limited update after a very long match */
-    {   const BYTE* const base = ms->window.base;
-        const BYTE* const istart = (const BYTE*)src;
-        const U32 curr = (U32)(istart-base);
-        if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1));   /* ensure no overflow */
-        if (curr > ms->nextToUpdate + 384)
-            ms->nextToUpdate = curr - MIN(192, (U32)(curr - ms->nextToUpdate - 384));
-    }
-
-    /* select and store sequences */
-    {   ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms);
-        size_t lastLLSize;
-        {   int i;
-            for (i = 0; i < ZSTD_REP_NUM; ++i)
-                zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i];
-        }
-        if (zc->externSeqStore.pos < zc->externSeqStore.size) {
-            assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable);
-            /* Updates ldmSeqStore.pos */
-            lastLLSize =
-                ZSTD_ldm_blockCompress(&zc->externSeqStore,
-                                       ms, &zc->seqStore,
-                                       zc->blockState.nextCBlock->rep,
-                                       zc->appliedParams.useRowMatchFinder,
-                                       src, srcSize);
-            assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
-        } else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) {
-            rawSeqStore_t ldmSeqStore = kNullRawSeqStore;
-
-            ldmSeqStore.seq = zc->ldmSequences;
-            ldmSeqStore.capacity = zc->maxNbLdmSequences;
-            /* Updates ldmSeqStore.size */
-            FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore,
-                                               &zc->appliedParams.ldmParams,
-                                               src, srcSize), "");
-            /* Updates ldmSeqStore.pos */
-            lastLLSize =
-                ZSTD_ldm_blockCompress(&ldmSeqStore,
-                                       ms, &zc->seqStore,
-                                       zc->blockState.nextCBlock->rep,
-                                       zc->appliedParams.useRowMatchFinder,
-                                       src, srcSize);
-            assert(ldmSeqStore.pos == ldmSeqStore.size);
-        } else {   /* not long range mode */
-            ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
-                                                                                    zc->appliedParams.useRowMatchFinder,
-                                                                                    dictMode);
-            ms->ldmSeqStore = NULL;
-            lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
-        }
-        {   const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
-            ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
-    }   }
-    return ZSTDbss_compress;
-}
-
-static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
-{
-    const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
-    const seqDef* seqStoreSeqs = seqStore->sequencesStart;
-    size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs;
-    size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart);
-    size_t literalsRead = 0;
-    size_t lastLLSize;
-
-    ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
-    size_t i;
-    repcodes_t updatedRepcodes;
-
-    assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
-    /* Ensure we have enough space for last literals "sequence" */
-    assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);
-    ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
-    for (i = 0; i < seqStoreSeqSize; ++i) {
-        U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM;
-        outSeqs[i].litLength = seqStoreSeqs[i].litLength;
-        outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH;
-        outSeqs[i].rep = 0;
-
-        if (i == seqStore->longLengthPos) {
-            if (seqStore->longLengthType == ZSTD_llt_literalLength) {
-                outSeqs[i].litLength += 0x10000;
-            } else if (seqStore->longLengthType == ZSTD_llt_matchLength) {
-                outSeqs[i].matchLength += 0x10000;
-            }
-        }
-
-        if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) {
-            /* Derive the correct offset corresponding to a repcode */
-            outSeqs[i].rep = seqStoreSeqs[i].offBase;
-            if (outSeqs[i].litLength != 0) {
-                rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1];
-            } else {
-                if (outSeqs[i].rep == 3) {
-                    rawOffset = updatedRepcodes.rep[0] - 1;
-                } else {
-                    rawOffset = updatedRepcodes.rep[outSeqs[i].rep];
-                }
-            }
-        }
-        outSeqs[i].offset = rawOffset;
-        /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
-           so we provide seqStoreSeqs[i].offset - 1 */
-        ZSTD_updateRep(updatedRepcodes.rep,
-                       seqStoreSeqs[i].offBase - 1,
-                       seqStoreSeqs[i].litLength == 0);
-        literalsRead += outSeqs[i].litLength;
-    }
-    /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
-     * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker
-     * for the block boundary, according to the API.
-     */
-    assert(seqStoreLiteralsSize >= literalsRead);
-    lastLLSize = seqStoreLiteralsSize - literalsRead;
-    outSeqs[i].litLength = (U32)lastLLSize;
-    outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0;
-    seqStoreSeqSize++;
-    zc->seqCollector.seqIndex += seqStoreSeqSize;
-}
-
-size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
-                              size_t outSeqsSize, const void* src, size_t srcSize)
-{
-    const size_t dstCapacity = ZSTD_compressBound(srcSize);
-    void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem);
-    SeqCollector seqCollector;
-
-    RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!");
-
-    seqCollector.collectSequences = 1;
-    seqCollector.seqStart = outSeqs;
-    seqCollector.seqIndex = 0;
-    seqCollector.maxSequences = outSeqsSize;
-    zc->seqCollector = seqCollector;
-
-    ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
-    ZSTD_customFree(dst, ZSTD_defaultCMem);
-    return zc->seqCollector.seqIndex;
-}
-
-size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) {
-    size_t in = 0;
-    size_t out = 0;
-    for (; in < seqsSize; ++in) {
-        if (sequences[in].offset == 0 && sequences[in].matchLength == 0) {
-            if (in != seqsSize - 1) {
-                sequences[in+1].litLength += sequences[in].litLength;
-            }
-        } else {
-            sequences[out] = sequences[in];
-            ++out;
-        }
-    }
-    return out;
-}
-
-/* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */
-static int ZSTD_isRLE(const BYTE* src, size_t length) {
-    const BYTE* ip = src;
-    const BYTE value = ip[0];
-    const size_t valueST = (size_t)((U64)value * 0x0101010101010101ULL);
-    const size_t unrollSize = sizeof(size_t) * 4;
-    const size_t unrollMask = unrollSize - 1;
-    const size_t prefixLength = length & unrollMask;
-    size_t i;
-    size_t u;
-    if (length == 1) return 1;
-    /* Check if prefix is RLE first before using unrolled loop */
-    if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) {
-        return 0;
-    }
-    for (i = prefixLength; i != length; i += unrollSize) {
-        for (u = 0; u < unrollSize; u += sizeof(size_t)) {
-            if (MEM_readST(ip + i + u) != valueST) {
-                return 0;
-            }
-        }
-    }
-    return 1;
-}
-
-/* Returns true if the given block may be RLE.
- * This is just a heuristic based on the compressibility.
- * It may return both false positives and false negatives.
- */
-static int ZSTD_maybeRLE(seqStore_t const* seqStore)
-{
-    size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
-    size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart);
-
-    return nbSeqs < 4 && nbLits < 10;
-}
-
-static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs)
-{
-    ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock;
-    bs->prevCBlock = bs->nextCBlock;
-    bs->nextCBlock = tmp;
-}
-
-/* Writes the block header */
-static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) {
-    U32 const cBlockHeader = cSize == 1 ?
-                        lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
-                        lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
-    MEM_writeLE24(op, cBlockHeader);
-    DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock);
-}
-
-/** ZSTD_buildBlockEntropyStats_literals() :
- *  Builds entropy for the literals.
- *  Stores literals block type (raw, rle, compressed, repeat) and
- *  huffman description table to hufMetadata.
- *  Requires ENTROPY_WORKSPACE_SIZE workspace
- *  @return : size of huffman description table or error code */
-static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize,
-                                            const ZSTD_hufCTables_t* prevHuf,
-                                                  ZSTD_hufCTables_t* nextHuf,
-                                                  ZSTD_hufCTablesMetadata_t* hufMetadata,
-                                                  const int literalsCompressionIsDisabled,
-                                                  void* workspace, size_t wkspSize)
-{
-    BYTE* const wkspStart = (BYTE*)workspace;
-    BYTE* const wkspEnd = wkspStart + wkspSize;
-    BYTE* const countWkspStart = wkspStart;
-    unsigned* const countWksp = (unsigned*)workspace;
-    const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
-    BYTE* const nodeWksp = countWkspStart + countWkspSize;
-    const size_t nodeWkspSize = wkspEnd-nodeWksp;
-    unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
-    unsigned huffLog = HUF_TABLELOG_DEFAULT;
-    HUF_repeat repeat = prevHuf->repeatMode;
-    DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize);
-
-    /* Prepare nextEntropy assuming reusing the existing table */
-    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-
-    if (literalsCompressionIsDisabled) {
-        DEBUGLOG(5, "set_basic - disabled");
-        hufMetadata->hType = set_basic;
-        return 0;
-    }
-
-    /* small ? don't even attempt compression (speed opt) */
-#ifndef COMPRESS_LITERALS_SIZE_MIN
-#define COMPRESS_LITERALS_SIZE_MIN 63
-#endif
-    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
-        if (srcSize <= minLitSize) {
-            DEBUGLOG(5, "set_basic - too small");
-            hufMetadata->hType = set_basic;
-            return 0;
-        }
-    }
-
-    /* Scan input and build symbol stats */
-    {   size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
-        FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
-        if (largest == srcSize) {
-            DEBUGLOG(5, "set_rle");
-            hufMetadata->hType = set_rle;
-            return 0;
-        }
-        if (largest <= (srcSize >> 7)+4) {
-            DEBUGLOG(5, "set_basic - no gain");
-            hufMetadata->hType = set_basic;
-            return 0;
-        }
-    }
-
-    /* Validate the previous Huffman table */
-    if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
-        repeat = HUF_repeat_none;
-    }
-
-    /* Build Huffman Tree */
-    ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
-    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
-    {   size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
-                                                    maxSymbolValue, huffLog,
-                                                    nodeWksp, nodeWkspSize);
-        FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
-        huffLog = (U32)maxBits;
-        {   /* Build and write the CTable */
-            size_t const newCSize = HUF_estimateCompressedSize(
-                    (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
-            size_t const hSize = HUF_writeCTable_wksp(
-                    hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
-                    (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
-                    nodeWksp, nodeWkspSize);
-            /* Check against repeating the previous CTable */
-            if (repeat != HUF_repeat_none) {
-                size_t const oldCSize = HUF_estimateCompressedSize(
-                        (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
-                if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
-                    DEBUGLOG(5, "set_repeat - smaller");
-                    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-                    hufMetadata->hType = set_repeat;
-                    return 0;
-                }
-            }
-            if (newCSize + hSize >= srcSize) {
-                DEBUGLOG(5, "set_basic - no gains");
-                ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-                hufMetadata->hType = set_basic;
-                return 0;
-            }
-            DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
-            hufMetadata->hType = set_compressed;
-            nextHuf->repeatMode = HUF_repeat_check;
-            return hSize;
-        }
-    }
-}
-
-
-/* ZSTD_buildDummySequencesStatistics():
- * Returns a ZSTD_symbolEncodingTypeStats_t with all encoding types as set_basic,
- * and updates nextEntropy to the appropriate repeatMode.
- */
-static ZSTD_symbolEncodingTypeStats_t
-ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) {
-    ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0};
-    nextEntropy->litlength_repeatMode = FSE_repeat_none;
-    nextEntropy->offcode_repeatMode = FSE_repeat_none;
-    nextEntropy->matchlength_repeatMode = FSE_repeat_none;
-    return stats;
-}
-
-/** ZSTD_buildBlockEntropyStats_sequences() :
- *  Builds entropy for the sequences.
- *  Stores symbol compression modes and fse table to fseMetadata.
- *  Requires ENTROPY_WORKSPACE_SIZE wksp.
- *  @return : size of fse tables or error code */
-static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
-                                              const ZSTD_fseCTables_t* prevEntropy,
-                                                    ZSTD_fseCTables_t* nextEntropy,
-                                              const ZSTD_CCtx_params* cctxParams,
-                                                    ZSTD_fseCTablesMetadata_t* fseMetadata,
-                                                    void* workspace, size_t wkspSize)
-{
-    ZSTD_strategy const strategy = cctxParams->cParams.strategy;
-    size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
-    BYTE* const ostart = fseMetadata->fseTablesBuffer;
-    BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
-    BYTE* op = ostart;
-    unsigned* countWorkspace = (unsigned*)workspace;
-    unsigned* entropyWorkspace = countWorkspace + (MaxSeq + 1);
-    size_t entropyWorkspaceSize = wkspSize - (MaxSeq + 1) * sizeof(*countWorkspace);
-    ZSTD_symbolEncodingTypeStats_t stats;
-
-    DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq);
-    stats = nbSeq != 0 ? ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
-                                          prevEntropy, nextEntropy, op, oend,
-                                          strategy, countWorkspace,
-                                          entropyWorkspace, entropyWorkspaceSize)
-                       : ZSTD_buildDummySequencesStatistics(nextEntropy);
-    FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!");
-    fseMetadata->llType = (symbolEncodingType_e) stats.LLtype;
-    fseMetadata->ofType = (symbolEncodingType_e) stats.Offtype;
-    fseMetadata->mlType = (symbolEncodingType_e) stats.MLtype;
-    fseMetadata->lastCountSize = stats.lastCountSize;
-    return stats.size;
-}
-
-
-/** ZSTD_buildBlockEntropyStats() :
- *  Builds entropy for the block.
- *  Requires workspace size ENTROPY_WORKSPACE_SIZE
- *
- *  @return : 0 on success or error code
- */
-size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
-                             const ZSTD_entropyCTables_t* prevEntropy,
-                                   ZSTD_entropyCTables_t* nextEntropy,
-                             const ZSTD_CCtx_params* cctxParams,
-                                   ZSTD_entropyCTablesMetadata_t* entropyMetadata,
-                                   void* workspace, size_t wkspSize)
-{
-    size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
-    entropyMetadata->hufMetadata.hufDesSize =
-        ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize,
-                                            &prevEntropy->huf, &nextEntropy->huf,
-                                            &entropyMetadata->hufMetadata,
-                                            ZSTD_literalsCompressionIsDisabled(cctxParams),
-                                            workspace, wkspSize);
-    FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed");
-    entropyMetadata->fseMetadata.fseTablesSize =
-        ZSTD_buildBlockEntropyStats_sequences(seqStorePtr,
-                                              &prevEntropy->fse, &nextEntropy->fse,
-                                              cctxParams,
-                                              &entropyMetadata->fseMetadata,
-                                              workspace, wkspSize);
-    FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildBlockEntropyStats_sequences failed");
-    return 0;
-}
-
-/* Returns the size estimate for the literals section (header + content) of a block */
-static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize,
-                                                const ZSTD_hufCTables_t* huf,
-                                                const ZSTD_hufCTablesMetadata_t* hufMetadata,
-                                                void* workspace, size_t wkspSize,
-                                                int writeEntropy)
-{
-    unsigned* const countWksp = (unsigned*)workspace;
-    unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
-    size_t literalSectionHeaderSize = 3 + (litSize >= 1 KB) + (litSize >= 16 KB);
-    U32 singleStream = litSize < 256;
-
-    if (hufMetadata->hType == set_basic) return litSize;
-    else if (hufMetadata->hType == set_rle) return 1;
-    else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) {
-        size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize);
-        if (ZSTD_isError(largest)) return litSize;
-        {   size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue);
-            if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize;
-            if (!singleStream) cLitSizeEstimate += 6; /* multi-stream huffman uses 6-byte jump table */
-            return cLitSizeEstimate + literalSectionHeaderSize;
-    }   }
-    assert(0); /* impossible */
-    return 0;
-}
-
-/* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */
-static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
-                        const BYTE* codeTable, size_t nbSeq, unsigned maxCode,
-                        const FSE_CTable* fseCTable,
-                        const U8* additionalBits,
-                        short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
-                        void* workspace, size_t wkspSize)
-{
-    unsigned* const countWksp = (unsigned*)workspace;
-    const BYTE* ctp = codeTable;
-    const BYTE* const ctStart = ctp;
-    const BYTE* const ctEnd = ctStart + nbSeq;
-    size_t cSymbolTypeSizeEstimateInBits = 0;
-    unsigned max = maxCode;
-
-    HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize);  /* can't fail */
-    if (type == set_basic) {
-        /* We selected this encoding type, so it must be valid. */
-        assert(max <= defaultMax);
-        (void)defaultMax;
-        cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max);
-    } else if (type == set_rle) {
-        cSymbolTypeSizeEstimateInBits = 0;
-    } else if (type == set_compressed || type == set_repeat) {
-        cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);
-    }
-    if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) {
-        return nbSeq * 10;
-    }
-    while (ctp < ctEnd) {
-        if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];
-        else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */
-        ctp++;
-    }
-    return cSymbolTypeSizeEstimateInBits >> 3;
-}
-
-/* Returns the size estimate for the sequences section (header + content) of a block */
-static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable,
-                                                  const BYTE* llCodeTable,
-                                                  const BYTE* mlCodeTable,
-                                                  size_t nbSeq,
-                                                  const ZSTD_fseCTables_t* fseTables,
-                                                  const ZSTD_fseCTablesMetadata_t* fseMetadata,
-                                                  void* workspace, size_t wkspSize,
-                                                  int writeEntropy)
-{
-    size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ);
-    size_t cSeqSizeEstimate = 0;
-    cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff,
-                                         fseTables->offcodeCTable, NULL,
-                                         OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
-                                         workspace, wkspSize);
-    cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL,
-                                         fseTables->litlengthCTable, LL_bits,
-                                         LL_defaultNorm, LL_defaultNormLog, MaxLL,
-                                         workspace, wkspSize);
-    cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML,
-                                         fseTables->matchlengthCTable, ML_bits,
-                                         ML_defaultNorm, ML_defaultNormLog, MaxML,
-                                         workspace, wkspSize);
-    if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
-    return cSeqSizeEstimate + sequencesSectionHeaderSize;
-}
-
-/* Returns the size estimate for a given stream of literals, of, ll, ml */
-static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize,
-                                     const BYTE* ofCodeTable,
-                                     const BYTE* llCodeTable,
-                                     const BYTE* mlCodeTable,
-                                     size_t nbSeq,
-                                     const ZSTD_entropyCTables_t* entropy,
-                                     const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
-                                     void* workspace, size_t wkspSize,
-                                     int writeLitEntropy, int writeSeqEntropy) {
-    size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize,
-                                                         &entropy->huf, &entropyMetadata->hufMetadata,
-                                                         workspace, wkspSize, writeLitEntropy);
-    size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
-                                                         nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
-                                                         workspace, wkspSize, writeSeqEntropy);
-    return seqSize + literalsSize + ZSTD_blockHeaderSize;
-}
-
-/* Builds entropy statistics and uses them for blocksize estimation.
- *
- * Returns the estimated compressed size of the seqStore, or a zstd error.
- */
-static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc) {
-    ZSTD_entropyCTablesMetadata_t* entropyMetadata = &zc->blockSplitCtx.entropyMetadata;
-    DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()");
-    FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore,
-                    &zc->blockState.prevCBlock->entropy,
-                    &zc->blockState.nextCBlock->entropy,
-                    &zc->appliedParams,
-                    entropyMetadata,
-                    zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
-    return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
-                    seqStore->ofCode, seqStore->llCode, seqStore->mlCode,
-                    (size_t)(seqStore->sequences - seqStore->sequencesStart),
-                    &zc->blockState.nextCBlock->entropy, entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
-                    (int)(entropyMetadata->hufMetadata.hType == set_compressed), 1);
-}
-
-/* Returns literals bytes represented in a seqStore */
-static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) {
-    size_t literalsBytes = 0;
-    size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
-    size_t i;
-    for (i = 0; i < nbSeqs; ++i) {
-        seqDef seq = seqStore->sequencesStart[i];
-        literalsBytes += seq.litLength;
-        if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) {
-            literalsBytes += 0x10000;
-        }
-    }
-    return literalsBytes;
-}
-
-/* Returns match bytes represented in a seqStore */
-static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) {
-    size_t matchBytes = 0;
-    size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
-    size_t i;
-    for (i = 0; i < nbSeqs; ++i) {
-        seqDef seq = seqStore->sequencesStart[i];
-        matchBytes += seq.mlBase + MINMATCH;
-        if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) {
-            matchBytes += 0x10000;
-        }
-    }
-    return matchBytes;
-}
-
-/* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx).
- * Stores the result in resultSeqStore.
- */
-static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
-                               const seqStore_t* originalSeqStore,
-                                     size_t startIdx, size_t endIdx) {
-    BYTE* const litEnd = originalSeqStore->lit;
-    size_t literalsBytes;
-    size_t literalsBytesPreceding = 0;
-
-    *resultSeqStore = *originalSeqStore;
-    if (startIdx > 0) {
-        resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx;
-        literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
-    }
-
-    /* Move longLengthPos into the correct position if necessary */
-    if (originalSeqStore->longLengthType != ZSTD_llt_none) {
-        if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) {
-            resultSeqStore->longLengthType = ZSTD_llt_none;
-        } else {
-            resultSeqStore->longLengthPos -= (U32)startIdx;
-        }
-    }
-    resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx;
-    resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx;
-    literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
-    resultSeqStore->litStart += literalsBytesPreceding;
-    if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) {
-        /* This accounts for possible last literals if the derived chunk reaches the end of the block */
-        resultSeqStore->lit = litEnd;
-    } else {
-        resultSeqStore->lit = resultSeqStore->litStart+literalsBytes;
-    }
-    resultSeqStore->llCode += startIdx;
-    resultSeqStore->mlCode += startIdx;
-    resultSeqStore->ofCode += startIdx;
-}
-
-/**
- * Returns the raw offset represented by the combination of offCode, ll0, and repcode history.
- * offCode must represent a repcode in the numeric representation of ZSTD_storeSeq().
- */
-static U32
-ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0)
-{
-    U32 const adjustedOffCode = STORED_REPCODE(offCode) - 1 + ll0;  /* [ 0 - 3 ] */
-    assert(STORED_IS_REPCODE(offCode));
-    if (adjustedOffCode == ZSTD_REP_NUM) {
-        /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */
-        assert(rep[0] > 0);
-        return rep[0] - 1;
-    }
-    return rep[adjustedOffCode];
-}
-
-/**
- * ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise
- * due to emission of RLE/raw blocks that disturb the offset history,
- * and replaces any repcodes within the seqStore that may be invalid.
- *
- * dRepcodes are updated as would be on the decompression side.
- * cRepcodes are updated exactly in accordance with the seqStore.
- *
- * Note : this function assumes seq->offBase respects the following numbering scheme :
- *        0 : invalid
- *        1-3 : repcode 1-3
- *        4+ : real_offset+3
- */
-static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes,
-                                          seqStore_t* const seqStore, U32 const nbSeq) {
-    U32 idx = 0;
-    for (; idx < nbSeq; ++idx) {
-        seqDef* const seq = seqStore->sequencesStart + idx;
-        U32 const ll0 = (seq->litLength == 0);
-        U32 const offCode = OFFBASE_TO_STORED(seq->offBase);
-        assert(seq->offBase > 0);
-        if (STORED_IS_REPCODE(offCode)) {
-            U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0);
-            U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0);
-            /* Adjust simulated decompression repcode history if we come across a mismatch. Replace
-             * the repcode with the offset it actually references, determined by the compression
-             * repcode history.
-             */
-            if (dRawOffset != cRawOffset) {
-                seq->offBase = cRawOffset + ZSTD_REP_NUM;
-            }
-        }
-        /* Compression repcode history is always updated with values directly from the unmodified seqStore.
-         * Decompression repcode history may use modified seq->offset value taken from compression repcode history.
-         */
-        ZSTD_updateRep(dRepcodes->rep, OFFBASE_TO_STORED(seq->offBase), ll0);
-        ZSTD_updateRep(cRepcodes->rep, offCode, ll0);
-    }
-}
-
-/* ZSTD_compressSeqStore_singleBlock():
- * Compresses a seqStore into a block with a block header, into the buffer dst.
- *
- * Returns the total size of that block (including header) or a ZSTD error code.
- */
-static size_t
-ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore,
-                                  repcodes_t* const dRep, repcodes_t* const cRep,
-                                  void* dst, size_t dstCapacity,
-                                  const void* src, size_t srcSize,
-                                  U32 lastBlock, U32 isPartition)
-{
-    const U32 rleMaxLength = 25;
-    BYTE* op = (BYTE*)dst;
-    const BYTE* ip = (const BYTE*)src;
-    size_t cSize;
-    size_t cSeqsSize;
-
-    /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */
-    repcodes_t const dRepOriginal = *dRep;
-    DEBUGLOG(5, "ZSTD_compressSeqStore_singleBlock");
-    if (isPartition)
-        ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart));
-
-    RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "Block header doesn't fit");
-    cSeqsSize = ZSTD_entropyCompressSeqStore(seqStore,
-                &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
-                &zc->appliedParams,
-                op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize,
-                srcSize,
-                zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
-                zc->bmi2);
-    FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!");
-
-    if (!zc->isFirstBlock &&
-        cSeqsSize < rleMaxLength &&
-        ZSTD_isRLE((BYTE const*)src, srcSize)) {
-        /* We don't want to emit our first block as a RLE even if it qualifies because
-        * doing so will cause the decoder (cli only) to throw a "should consume all input error."
-        * This is only an issue for zstd <= v1.4.3
-        */
-        cSeqsSize = 1;
-    }
-
-    if (zc->seqCollector.collectSequences) {
-        ZSTD_copyBlockSequences(zc);
-        ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
-        return 0;
-    }
-
-    if (cSeqsSize == 0) {
-        cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
-        FORWARD_IF_ERROR(cSize, "Nocompress block failed");
-        DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize);
-        *dRep = dRepOriginal; /* reset simulated decompression repcode history */
-    } else if (cSeqsSize == 1) {
-        cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock);
-        FORWARD_IF_ERROR(cSize, "RLE compress block failed");
-        DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize);
-        *dRep = dRepOriginal; /* reset simulated decompression repcode history */
-    } else {
-        ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
-        writeBlockHeader(op, cSeqsSize, srcSize, lastBlock);
-        cSize = ZSTD_blockHeaderSize + cSeqsSize;
-        DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize);
-    }
-
-    if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
-        zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
-
-    return cSize;
-}
-
-/* Struct to keep track of where we are in our recursive calls. */
-typedef struct {
-    U32* splitLocations;    /* Array of split indices */
-    size_t idx;             /* The current index within splitLocations being worked on */
-} seqStoreSplits;
-
-#define MIN_SEQUENCES_BLOCK_SPLITTING 300
-
-/* Helper function to perform the recursive search for block splits.
- * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.
- * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then
- * we do not recurse.
- *
- * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING.
- * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).
- * In practice, recursion depth usually doesn't go beyond 4.
- *
- * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize
- * maximum of 128 KB, this value is actually impossible to reach.
- */
-static void
-ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,
-                             ZSTD_CCtx* zc, const seqStore_t* origSeqStore)
-{
-    seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk;
-    seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore;
-    seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore;
-    size_t estimatedOriginalSize;
-    size_t estimatedFirstHalfSize;
-    size_t estimatedSecondHalfSize;
-    size_t midIdx = (startIdx + endIdx)/2;
-
-    if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) {
-        DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences");
-        return;
-    }
-    DEBUGLOG(4, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx);
-    ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx);
-    ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx);
-    ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx);
-    estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(fullSeqStoreChunk, zc);
-    estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(firstHalfSeqStore, zc);
-    estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(secondHalfSeqStore, zc);
-    DEBUGLOG(4, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",
-             estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize);
-    if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) {
-        return;
-    }
-    if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) {
-        ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore);
-        splits->splitLocations[splits->idx] = (U32)midIdx;
-        splits->idx++;
-        ZSTD_deriveBlockSplitsHelper(splits, midIdx, endIdx, zc, origSeqStore);
-    }
-}
-
-/* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio.
- *
- * Returns the number of splits made (which equals the size of the partition table - 1).
- */
-static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) {
-    seqStoreSplits splits = {partitions, 0};
-    if (nbSeq <= 4) {
-        DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split");
-        /* Refuse to try and split anything with less than 4 sequences */
-        return 0;
-    }
-    ZSTD_deriveBlockSplitsHelper(&splits, 0, nbSeq, zc, &zc->seqStore);
-    splits.splitLocations[splits.idx] = nbSeq;
-    DEBUGLOG(5, "ZSTD_deriveBlockSplits: final nb partitions: %zu", splits.idx+1);
-    return splits.idx;
-}
-
-/* ZSTD_compressBlock_splitBlock():
- * Attempts to split a given block into multiple blocks to improve compression ratio.
- *
- * Returns combined size of all blocks (which includes headers), or a ZSTD error code.
- */
-static size_t
-ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity,
-                                       const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq)
-{
-    size_t cSize = 0;
-    const BYTE* ip = (const BYTE*)src;
-    BYTE* op = (BYTE*)dst;
-    size_t i = 0;
-    size_t srcBytesTotal = 0;
-    U32* partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */
-    seqStore_t* nextSeqStore = &zc->blockSplitCtx.nextSeqStore;
-    seqStore_t* currSeqStore = &zc->blockSplitCtx.currSeqStore;
-    size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
-
-    /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history
-     * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two
-     * separate repcode histories that simulate repcode history on compression and decompression side,
-     * and use the histories to determine whether we must replace a particular repcode with its raw offset.
-     *
-     * 1) cRep gets updated for each partition, regardless of whether the block was emitted as uncompressed
-     *    or RLE. This allows us to retrieve the offset value that an invalid repcode references within
-     *    a nocompress/RLE block.
-     * 2) dRep gets updated only for compressed partitions, and when a repcode gets replaced, will use
-     *    the replacement offset value rather than the original repcode to update the repcode history.
-     *    dRep also will be the final repcode history sent to the next block.
-     *
-     * See ZSTD_seqStore_resolveOffCodes() for more details.
-     */
-    repcodes_t dRep;
-    repcodes_t cRep;
-    ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
-    ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
-    ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t));
-
-    DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
-                (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
-                (unsigned)zc->blockState.matchState.nextToUpdate);
-
-    if (numSplits == 0) {
-        size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore,
-                                                                   &dRep, &cRep,
-                                                                    op, dstCapacity,
-                                                                    ip, blockSize,
-                                                                    lastBlock, 0 /* isPartition */);
-        FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!");
-        DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits");
-        assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
-        return cSizeSingleBlock;
-    }
-
-    ZSTD_deriveSeqStoreChunk(currSeqStore, &zc->seqStore, 0, partitions[0]);
-    for (i = 0; i <= numSplits; ++i) {
-        size_t srcBytes;
-        size_t cSizeChunk;
-        U32 const lastPartition = (i == numSplits);
-        U32 lastBlockEntireSrc = 0;
-
-        srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore);
-        srcBytesTotal += srcBytes;
-        if (lastPartition) {
-            /* This is the final partition, need to account for possible last literals */
-            srcBytes += blockSize - srcBytesTotal;
-            lastBlockEntireSrc = lastBlock;
-        } else {
-            ZSTD_deriveSeqStoreChunk(nextSeqStore, &zc->seqStore, partitions[i], partitions[i+1]);
-        }
-
-        cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, currSeqStore,
-                                                      &dRep, &cRep,
-                                                       op, dstCapacity,
-                                                       ip, srcBytes,
-                                                       lastBlockEntireSrc, 1 /* isPartition */);
-        DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk);
-        FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!");
-
-        ip += srcBytes;
-        op += cSizeChunk;
-        dstCapacity -= cSizeChunk;
-        cSize += cSizeChunk;
-        *currSeqStore = *nextSeqStore;
-        assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
-    }
-    /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes
-     * for the next block.
-     */
-    ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t));
-    return cSize;
-}
-
-static size_t
-ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
-                              void* dst, size_t dstCapacity,
-                              const void* src, size_t srcSize, U32 lastBlock)
-{
-    const BYTE* ip = (const BYTE*)src;
-    BYTE* op = (BYTE*)dst;
-    U32 nbSeq;
-    size_t cSize;
-    DEBUGLOG(4, "ZSTD_compressBlock_splitBlock");
-    assert(zc->appliedParams.useBlockSplitter == ZSTD_ps_enable);
-
-    {   const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
-        FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
-        if (bss == ZSTDbss_noCompress) {
-            if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
-                zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
-            cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
-            FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
-            DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");
-            return cSize;
-        }
-        nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart);
-    }
-
-    cSize = ZSTD_compressBlock_splitBlock_internal(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq);
-    FORWARD_IF_ERROR(cSize, "Splitting blocks failed!");
-    return cSize;
-}
-
-static size_t
-ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
-                            void* dst, size_t dstCapacity,
-                            const void* src, size_t srcSize, U32 frame)
-{
-    /* This the upper bound for the length of an rle block.
-     * This isn't the actual upper bound. Finding the real threshold
-     * needs further investigation.
-     */
-    const U32 rleMaxLength = 25;
-    size_t cSize;
-    const BYTE* ip = (const BYTE*)src;
-    BYTE* op = (BYTE*)dst;
-    DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
-                (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
-                (unsigned)zc->blockState.matchState.nextToUpdate);
-
-    {   const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
-        FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
-        if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
-    }
-
-    if (zc->seqCollector.collectSequences) {
-        ZSTD_copyBlockSequences(zc);
-        ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
-        return 0;
-    }
-
-    /* encode sequences and literals */
-    cSize = ZSTD_entropyCompressSeqStore(&zc->seqStore,
-            &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
-            &zc->appliedParams,
-            dst, dstCapacity,
-            srcSize,
-            zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
-            zc->bmi2);
-
-    if (frame &&
-        /* We don't want to emit our first block as a RLE even if it qualifies because
-         * doing so will cause the decoder (cli only) to throw a "should consume all input error."
-         * This is only an issue for zstd <= v1.4.3
-         */
-        !zc->isFirstBlock &&
-        cSize < rleMaxLength &&
-        ZSTD_isRLE(ip, srcSize))
-    {
-        cSize = 1;
-        op[0] = ip[0];
-    }
-
-out:
-    if (!ZSTD_isError(cSize) && cSize > 1) {
-        ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
-    }
-    /* We check that dictionaries have offset codes available for the first
-     * block. After the first block, the offcode table might not have large
-     * enough codes to represent the offsets in the data.
-     */
-    if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
-        zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
-
-    return cSize;
-}
-
-static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
-                               void* dst, size_t dstCapacity,
-                               const void* src, size_t srcSize,
-                               const size_t bss, U32 lastBlock)
-{
-    DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()");
-    if (bss == ZSTDbss_compress) {
-        if (/* We don't want to emit our first block as a RLE even if it qualifies because
-            * doing so will cause the decoder (cli only) to throw a "should consume all input error."
-            * This is only an issue for zstd <= v1.4.3
-            */
-            !zc->isFirstBlock &&
-            ZSTD_maybeRLE(&zc->seqStore) &&
-            ZSTD_isRLE((BYTE const*)src, srcSize))
-        {
-            return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock);
-        }
-        /* Attempt superblock compression.
-         *
-         * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the
-         * standard ZSTD_compressBound(). This is a problem, because even if we have
-         * space now, taking an extra byte now could cause us to run out of space later
-         * and violate ZSTD_compressBound().
-         *
-         * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize.
-         *
-         * In order to respect ZSTD_compressBound() we must attempt to emit a raw
-         * uncompressed block in these cases:
-         *   * cSize == 0: Return code for an uncompressed block.
-         *   * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize).
-         *     ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of
-         *     output space.
-         *   * cSize >= blockBound(srcSize): We have expanded the block too much so
-         *     emit an uncompressed block.
-         */
-        {
-            size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);
-            if (cSize != ERROR(dstSize_tooSmall)) {
-                size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
-                FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");
-                if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
-                    ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
-                    return cSize;
-                }
-            }
-        }
-    }
-
-    DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()");
-    /* Superblock compression failed, attempt to emit a single no compress block.
-     * The decoder will be able to stream this block since it is uncompressed.
-     */
-    return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);
-}
-
-static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc,
-                               void* dst, size_t dstCapacity,
-                               const void* src, size_t srcSize,
-                               U32 lastBlock)
-{
-    size_t cSize = 0;
-    const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
-    DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)",
-                (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize);
-    FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
-
-    cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock);
-    FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed");
-
-    if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
-        zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
-
-    return cSize;
-}
-
-static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
-                                         ZSTD_cwksp* ws,
-                                         ZSTD_CCtx_params const* params,
-                                         void const* ip,
-                                         void const* iend)
-{
-    U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
-    U32 const maxDist = (U32)1 << params->cParams.windowLog;
-    if (ZSTD_window_needOverflowCorrection(ms->window, cycleLog, maxDist, ms->loadedDictEnd, ip, iend)) {
-        U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
-        ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
-        ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
-        ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
-        ZSTD_cwksp_mark_tables_dirty(ws);
-        ZSTD_reduceIndex(ms, params, correction);
-        ZSTD_cwksp_mark_tables_clean(ws);
-        if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
-        else ms->nextToUpdate -= correction;
-        /* invalidate dictionaries on overflow correction */
-        ms->loadedDictEnd = 0;
-        ms->dictMatchState = NULL;
-    }
-}
-
-/*! ZSTD_compress_frameChunk() :
-*   Compress a chunk of data into one or multiple blocks.
-*   All blocks will be terminated, all input will be consumed.
-*   Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
-*   Frame is supposed already started (header already produced)
-*   @return : compressed size, or an error code
-*/
-static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
-                                     void* dst, size_t dstCapacity,
-                               const void* src, size_t srcSize,
-                                     U32 lastFrameChunk)
-{
-    size_t blockSize = cctx->blockSize;
-    size_t remaining = srcSize;
-    const BYTE* ip = (const BYTE*)src;
-    BYTE* const ostart = (BYTE*)dst;
-    BYTE* op = ostart;
-    U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;
-
-    assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);
-
-    DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
-    if (cctx->appliedParams.fParams.checksumFlag && srcSize)
-        XXH64_update(&cctx->xxhState, src, srcSize);
-
-    while (remaining) {
-        ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
-        U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
-
-        RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE,
-                        dstSize_tooSmall,
-                        "not enough space to store compressed block");
-        if (remaining < blockSize) blockSize = remaining;
-
-        ZSTD_overflowCorrectIfNeeded(
-            ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize);
-        ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
-        ZSTD_window_enforceMaxDist(&ms->window, ip, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
-
-        /* Ensure hash/chain table insertion resumes no sooner than lowlimit */
-        if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
-
-        {   size_t cSize;
-            if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) {
-                cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock);
-                FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed");
-                assert(cSize > 0);
-                assert(cSize <= blockSize + ZSTD_blockHeaderSize);
-            } else if (ZSTD_blockSplitterEnabled(&cctx->appliedParams)) {
-                cSize = ZSTD_compressBlock_splitBlock(cctx, op, dstCapacity, ip, blockSize, lastBlock);
-                FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_splitBlock failed");
-                assert(cSize > 0 || cctx->seqCollector.collectSequences == 1);
-            } else {
-                cSize = ZSTD_compressBlock_internal(cctx,
-                                        op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
-                                        ip, blockSize, 1 /* frame */);
-                FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed");
-
-                if (cSize == 0) {  /* block is not compressible */
-                    cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
-                    FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
-                } else {
-                    U32 const cBlockHeader = cSize == 1 ?
-                        lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
-                        lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
-                    MEM_writeLE24(op, cBlockHeader);
-                    cSize += ZSTD_blockHeaderSize;
-                }
-            }
-
-
-            ip += blockSize;
-            assert(remaining >= blockSize);
-            remaining -= blockSize;
-            op += cSize;
-            assert(dstCapacity >= cSize);
-            dstCapacity -= cSize;
-            cctx->isFirstBlock = 0;
-            DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",
-                        (unsigned)cSize);
-    }   }
-
-    if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
-    return (size_t)(op-ostart);
-}
-
-
-static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
-                                    const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID)
-{   BYTE* const op = (BYTE*)dst;
-    U32   const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536);   /* 0-3 */
-    U32   const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength;   /* 0-3 */
-    U32   const checksumFlag = params->fParams.checksumFlag>0;
-    U32   const windowSize = (U32)1 << params->cParams.windowLog;
-    U32   const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
-    BYTE  const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
-    U32   const fcsCode = params->fParams.contentSizeFlag ?
-                     (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0;  /* 0-3 */
-    BYTE  const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
-    size_t pos=0;
-
-    assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
-    RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall,
-                    "dst buf is too small to fit worst-case frame header size.");
-    DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
-                !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);
-    if (params->format == ZSTD_f_zstd1) {
-        MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
-        pos = 4;
-    }
-    op[pos++] = frameHeaderDescriptionByte;
-    if (!singleSegment) op[pos++] = windowLogByte;
-    switch(dictIDSizeCode)
-    {
-        default:
-            assert(0); /* impossible */
-            ZSTD_FALLTHROUGH;
-        case 0 : break;
-        case 1 : op[pos] = (BYTE)(dictID); pos++; break;
-        case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
-        case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break;
-    }
-    switch(fcsCode)
-    {
-        default:
-            assert(0); /* impossible */
-            ZSTD_FALLTHROUGH;
-        case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
-        case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
-        case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
-        case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;
-    }
-    return pos;
-}
-
-/* ZSTD_writeSkippableFrame_advanced() :
- * Writes out a skippable frame with the specified magic number variant (16 are supported),
- * from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data.
- *
- * Returns the total number of bytes written, or a ZSTD error code.
- */
-size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
-                                const void* src, size_t srcSize, unsigned magicVariant) {
-    BYTE* op = (BYTE*)dst;
-    RETURN_ERROR_IF(dstCapacity < srcSize + ZSTD_SKIPPABLEHEADERSIZE /* Skippable frame overhead */,
-                    dstSize_tooSmall, "Not enough room for skippable frame");
-    RETURN_ERROR_IF(srcSize > (unsigned)0xFFFFFFFF, srcSize_wrong, "Src size too large for skippable frame");
-    RETURN_ERROR_IF(magicVariant > 15, parameter_outOfBound, "Skippable frame magic number variant not supported");
-
-    MEM_writeLE32(op, (U32)(ZSTD_MAGIC_SKIPPABLE_START + magicVariant));
-    MEM_writeLE32(op+4, (U32)srcSize);
-    ZSTD_memcpy(op+8, src, srcSize);
-    return srcSize + ZSTD_SKIPPABLEHEADERSIZE;
-}
-
-/* ZSTD_writeLastEmptyBlock() :
- * output an empty Block with end-of-frame mark to complete a frame
- * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
- *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
- */
-size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
-{
-    RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall,
-                    "dst buf is too small to write frame trailer empty block.");
-    {   U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1);  /* 0 size */
-        MEM_writeLE24(dst, cBlockHeader24);
-        return ZSTD_blockHeaderSize;
-    }
-}
-
-size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
-{
-    RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong,
-                    "wrong cctx stage");
-    RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable,
-                    parameter_unsupported,
-                    "incompatible with ldm");
-    cctx->externSeqStore.seq = seq;
-    cctx->externSeqStore.size = nbSeq;
-    cctx->externSeqStore.capacity = nbSeq;
-    cctx->externSeqStore.pos = 0;
-    cctx->externSeqStore.posInSequence = 0;
-    return 0;
-}
-
-
-static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
-                              void* dst, size_t dstCapacity,
-                        const void* src, size_t srcSize,
-                               U32 frame, U32 lastFrameChunk)
-{
-    ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
-    size_t fhSize = 0;
-
-    DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",
-                cctx->stage, (unsigned)srcSize);
-    RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong,
-                    "missing init (ZSTD_compressBegin)");
-
-    if (frame && (cctx->stage==ZSTDcs_init)) {
-        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams,
-                                       cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
-        FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
-        assert(fhSize <= dstCapacity);
-        dstCapacity -= fhSize;
-        dst = (char*)dst + fhSize;
-        cctx->stage = ZSTDcs_ongoing;
-    }
-
-    if (!srcSize) return fhSize;  /* do not generate an empty block if no input */
-
-    if (!ZSTD_window_update(&ms->window, src, srcSize, ms->forceNonContiguous)) {
-        ms->forceNonContiguous = 0;
-        ms->nextToUpdate = ms->window.dictLimit;
-    }
-    if (cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) {
-        ZSTD_window_update(&cctx->ldmState.window, src, srcSize, /* forceNonContiguous */ 0);
-    }
-
-    if (!frame) {
-        /* overflow check and correction for block mode */
-        ZSTD_overflowCorrectIfNeeded(
-            ms, &cctx->workspace, &cctx->appliedParams,
-            src, (BYTE const*)src + srcSize);
-    }
-
-    DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
-    {   size_t const cSize = frame ?
-                             ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
-                             ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */);
-        FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed");
-        cctx->consumedSrcSize += srcSize;
-        cctx->producedCSize += (cSize + fhSize);
-        assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
-        if (cctx->pledgedSrcSizePlusOne != 0) {  /* control src size */
-            ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
-            RETURN_ERROR_IF(
-                cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne,
-                srcSize_wrong,
-                "error : pledgedSrcSize = %u, while realSrcSize >= %u",
-                (unsigned)cctx->pledgedSrcSizePlusOne-1,
-                (unsigned)cctx->consumedSrcSize);
-        }
-        return cSize + fhSize;
-    }
-}
-
-size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
-                              void* dst, size_t dstCapacity,
-                        const void* src, size_t srcSize)
-{
-    DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize);
-    return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
-}
-
-
-size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
-{
-    ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;
-    assert(!ZSTD_checkCParams(cParams));
-    return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog);
-}
-
-size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
-{
-    DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize);
-    { size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
-      RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); }
-
-    return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
-}
-
-/*! ZSTD_loadDictionaryContent() :
- *  @return : 0, or an error code
- */
-static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
-                                         ldmState_t* ls,
-                                         ZSTD_cwksp* ws,
-                                         ZSTD_CCtx_params const* params,
-                                         const void* src, size_t srcSize,
-                                         ZSTD_dictTableLoadMethod_e dtlm)
-{
-    const BYTE* ip = (const BYTE*) src;
-    const BYTE* const iend = ip + srcSize;
-    int const loadLdmDict = params->ldmParams.enableLdm == ZSTD_ps_enable && ls != NULL;
-
-    /* Assert that we the ms params match the params we're being given */
-    ZSTD_assertEqualCParams(params->cParams, ms->cParams);
-
-    if (srcSize > ZSTD_CHUNKSIZE_MAX) {
-        /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX.
-         * Dictionaries right at the edge will immediately trigger overflow
-         * correction, but I don't want to insert extra constraints here.
-         */
-        U32 const maxDictSize = ZSTD_CURRENT_MAX - 1;
-        /* We must have cleared our windows when our source is this large. */
-        assert(ZSTD_window_isEmpty(ms->window));
-        if (loadLdmDict)
-            assert(ZSTD_window_isEmpty(ls->window));
-        /* If the dictionary is too large, only load the suffix of the dictionary. */
-        if (srcSize > maxDictSize) {
-            ip = iend - maxDictSize;
-            src = ip;
-            srcSize = maxDictSize;
-        }
-    }
-
-    DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder);
-    ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0);
-    ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
-    ms->forceNonContiguous = params->deterministicRefPrefix;
-
-    if (loadLdmDict) {
-        ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0);
-        ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
-    }
-
-    if (srcSize <= HASH_READ_SIZE) return 0;
-
-    ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend);
-
-    if (loadLdmDict)
-        ZSTD_ldm_fillHashTable(ls, ip, iend, &params->ldmParams);
-
-    switch(params->cParams.strategy)
-    {
-    case ZSTD_fast:
-        ZSTD_fillHashTable(ms, iend, dtlm);
-        break;
-    case ZSTD_dfast:
-        ZSTD_fillDoubleHashTable(ms, iend, dtlm);
-        break;
-
-    case ZSTD_greedy:
-    case ZSTD_lazy:
-    case ZSTD_lazy2:
-        assert(srcSize >= HASH_READ_SIZE);
-        if (ms->dedicatedDictSearch) {
-            assert(ms->chainTable != NULL);
-            ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, iend-HASH_READ_SIZE);
-        } else {
-            assert(params->useRowMatchFinder != ZSTD_ps_auto);
-            if (params->useRowMatchFinder == ZSTD_ps_enable) {
-                size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16);
-                ZSTD_memset(ms->tagTable, 0, tagTableSize);
-                ZSTD_row_update(ms, iend-HASH_READ_SIZE);
-                DEBUGLOG(4, "Using row-based hash table for lazy dict");
-            } else {
-                ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE);
-                DEBUGLOG(4, "Using chain-based hash table for lazy dict");
-            }
-        }
-        break;
-
-    case ZSTD_btlazy2:   /* we want the dictionary table fully sorted */
-    case ZSTD_btopt:
-    case ZSTD_btultra:
-    case ZSTD_btultra2:
-        assert(srcSize >= HASH_READ_SIZE);
-        ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
-        break;
-
-    default:
-        assert(0);  /* not possible : not a valid strategy id */
-    }
-
-    ms->nextToUpdate = (U32)(iend - ms->window.base);
-    return 0;
-}
-
-
-/* Dictionaries that assign zero probability to symbols that show up causes problems
- * when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check
- * and only dictionaries with 100% valid symbols can be assumed valid.
- */
-static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue)
-{
-    U32 s;
-    if (dictMaxSymbolValue < maxSymbolValue) {
-        return FSE_repeat_check;
-    }
-    for (s = 0; s <= maxSymbolValue; ++s) {
-        if (normalizedCounter[s] == 0) {
-            return FSE_repeat_check;
-        }
-    }
-    return FSE_repeat_valid;
-}
-
-size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
-                         const void* const dict, size_t dictSize)
-{
-    short offcodeNCount[MaxOff+1];
-    unsigned offcodeMaxValue = MaxOff;
-    const BYTE* dictPtr = (const BYTE*)dict;    /* skip magic num and dict ID */
-    const BYTE* const dictEnd = dictPtr + dictSize;
-    dictPtr += 8;
-    bs->entropy.huf.repeatMode = HUF_repeat_check;
-
-    {   unsigned maxSymbolValue = 255;
-        unsigned hasZeroWeights = 1;
-        size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr,
-            dictEnd-dictPtr, &hasZeroWeights);
-
-        /* We only set the loaded table as valid if it contains all non-zero
-         * weights. Otherwise, we set it to check */
-        if (!hasZeroWeights)
-            bs->entropy.huf.repeatMode = HUF_repeat_valid;
-
-        RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, "");
-        RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, "");
-        dictPtr += hufHeaderSize;
-    }
-
-    {   unsigned offcodeLog;
-        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
-        RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
-        RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
-        /* fill all offset symbols to avoid garbage at end of table */
-        RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
-                bs->entropy.fse.offcodeCTable,
-                offcodeNCount, MaxOff, offcodeLog,
-                workspace, HUF_WORKSPACE_SIZE)),
-            dictionary_corrupted, "");
-        /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
-        dictPtr += offcodeHeaderSize;
-    }
-
-    {   short matchlengthNCount[MaxML+1];
-        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
-        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
-        RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
-        RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
-        RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
-                bs->entropy.fse.matchlengthCTable,
-                matchlengthNCount, matchlengthMaxValue, matchlengthLog,
-                workspace, HUF_WORKSPACE_SIZE)),
-            dictionary_corrupted, "");
-        bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(matchlengthNCount, matchlengthMaxValue, MaxML);
-        dictPtr += matchlengthHeaderSize;
-    }
-
-    {   short litlengthNCount[MaxLL+1];
-        unsigned litlengthMaxValue = MaxLL, litlengthLog;
-        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
-        RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
-        RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
-        RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
-                bs->entropy.fse.litlengthCTable,
-                litlengthNCount, litlengthMaxValue, litlengthLog,
-                workspace, HUF_WORKSPACE_SIZE)),
-            dictionary_corrupted, "");
-        bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(litlengthNCount, litlengthMaxValue, MaxLL);
-        dictPtr += litlengthHeaderSize;
-    }
-
-    RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");
-    bs->rep[0] = MEM_readLE32(dictPtr+0);
-    bs->rep[1] = MEM_readLE32(dictPtr+4);
-    bs->rep[2] = MEM_readLE32(dictPtr+8);
-    dictPtr += 12;
-
-    {   size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
-        U32 offcodeMax = MaxOff;
-        if (dictContentSize <= ((U32)-1) - 128 KB) {
-            U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
-            offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
-        }
-        /* All offset values <= dictContentSize + 128 KB must be representable for a valid table */
-        bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff));
-
-        /* All repCodes must be <= dictContentSize and != 0 */
-        {   U32 u;
-            for (u=0; u<3; u++) {
-                RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, "");
-                RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, "");
-    }   }   }
-
-    return dictPtr - (const BYTE*)dict;
-}
-
-/* Dictionary format :
- * See :
- * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#dictionary-format
- */
-/*! ZSTD_loadZstdDictionary() :
- * @return : dictID, or an error code
- *  assumptions : magic number supposed already checked
- *                dictSize supposed >= 8
- */
-static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
-                                      ZSTD_matchState_t* ms,
-                                      ZSTD_cwksp* ws,
-                                      ZSTD_CCtx_params const* params,
-                                      const void* dict, size_t dictSize,
-                                      ZSTD_dictTableLoadMethod_e dtlm,
-                                      void* workspace)
-{
-    const BYTE* dictPtr = (const BYTE*)dict;
-    const BYTE* const dictEnd = dictPtr + dictSize;
-    size_t dictID;
-    size_t eSize;
-    ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
-    assert(dictSize >= 8);
-    assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
-
-    dictID = params->fParams.noDictIDFlag ? 0 :  MEM_readLE32(dictPtr + 4 /* skip magic number */ );
-    eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize);
-    FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed");
-    dictPtr += eSize;
-
-    {
-        size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
-        FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
-            ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), "");
-    }
-    return dictID;
-}
-
-/** ZSTD_compress_insertDictionary() :
-*   @return : dictID, or an error code */
-static size_t
-ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
-                               ZSTD_matchState_t* ms,
-                               ldmState_t* ls,
-                               ZSTD_cwksp* ws,
-                         const ZSTD_CCtx_params* params,
-                         const void* dict, size_t dictSize,
-                               ZSTD_dictContentType_e dictContentType,
-                               ZSTD_dictTableLoadMethod_e dtlm,
-                               void* workspace)
-{
-    DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
-    if ((dict==NULL) || (dictSize<8)) {
-        RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
-        return 0;
-    }
-
-    ZSTD_reset_compressedBlockState(bs);
-
-    /* dict restricted modes */
-    if (dictContentType == ZSTD_dct_rawContent)
-        return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm);
-
-    if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
-        if (dictContentType == ZSTD_dct_auto) {
-            DEBUGLOG(4, "raw content dictionary detected");
-            return ZSTD_loadDictionaryContent(
-                ms, ls, ws, params, dict, dictSize, dtlm);
-        }
-        RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
-        assert(0);   /* impossible */
-    }
-
-    /* dict as full zstd dictionary */
-    return ZSTD_loadZstdDictionary(
-        bs, ms, ws, params, dict, dictSize, dtlm, workspace);
-}
-
-#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
-#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL)
-
-/*! ZSTD_compressBegin_internal() :
- * @return : 0, or an error code */
-static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
-                                    const void* dict, size_t dictSize,
-                                    ZSTD_dictContentType_e dictContentType,
-                                    ZSTD_dictTableLoadMethod_e dtlm,
-                                    const ZSTD_CDict* cdict,
-                                    const ZSTD_CCtx_params* params, U64 pledgedSrcSize,
-                                    ZSTD_buffered_policy_e zbuff)
-{
-    size_t const dictContentSize = cdict ? cdict->dictContentSize : dictSize;
-#if ZSTD_TRACE
-    cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0;
-#endif
-    DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog);
-    /* params are supposed to be fully validated at this point */
-    assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
-    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
-    if ( (cdict)
-      && (cdict->dictContentSize > 0)
-      && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
-        || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
-        || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
-        || cdict->compressionLevel == 0)
-      && (params->attachDictPref != ZSTD_dictForceLoad) ) {
-        return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
-    }
-
-    FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
-                                     dictContentSize,
-                                     ZSTDcrp_makeClean, zbuff) , "");
-    {   size_t const dictID = cdict ?
-                ZSTD_compress_insertDictionary(
-                        cctx->blockState.prevCBlock, &cctx->blockState.matchState,
-                        &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
-                        cdict->dictContentSize, cdict->dictContentType, dtlm,
-                        cctx->entropyWorkspace)
-              : ZSTD_compress_insertDictionary(
-                        cctx->blockState.prevCBlock, &cctx->blockState.matchState,
-                        &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize,
-                        dictContentType, dtlm, cctx->entropyWorkspace);
-        FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
-        assert(dictID <= UINT_MAX);
-        cctx->dictID = (U32)dictID;
-        cctx->dictContentSize = dictContentSize;
-    }
-    return 0;
-}
-
-size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
-                                    const void* dict, size_t dictSize,
-                                    ZSTD_dictContentType_e dictContentType,
-                                    ZSTD_dictTableLoadMethod_e dtlm,
-                                    const ZSTD_CDict* cdict,
-                                    const ZSTD_CCtx_params* params,
-                                    unsigned long long pledgedSrcSize)
-{
-    DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog);
-    /* compression parameters verification and optimization */
-    FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , "");
-    return ZSTD_compressBegin_internal(cctx,
-                                       dict, dictSize, dictContentType, dtlm,
-                                       cdict,
-                                       params, pledgedSrcSize,
-                                       ZSTDb_not_buffered);
-}
-
-/*! ZSTD_compressBegin_advanced() :
-*   @return : 0, or an error code */
-size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
-                             const void* dict, size_t dictSize,
-                                   ZSTD_parameters params, unsigned long long pledgedSrcSize)
-{
-    ZSTD_CCtx_params cctxParams;
-    ZSTD_CCtxParams_init_internal(&cctxParams, &params, ZSTD_NO_CLEVEL);
-    return ZSTD_compressBegin_advanced_internal(cctx,
-                                            dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast,
-                                            NULL /*cdict*/,
-                                            &cctxParams, pledgedSrcSize);
-}
-
-size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
-{
-    ZSTD_CCtx_params cctxParams;
-    {
-        ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
-        ZSTD_CCtxParams_init_internal(&cctxParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel);
-    }
-    DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
-    return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
-                                       &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
-}
-
-size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
-{
-    return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
-}
-
-
-/*! ZSTD_writeEpilogue() :
-*   Ends a frame.
-*   @return : nb of bytes written into dst (or an error code) */
-static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
-{
-    BYTE* const ostart = (BYTE*)dst;
-    BYTE* op = ostart;
-    size_t fhSize = 0;
-
-    DEBUGLOG(4, "ZSTD_writeEpilogue");
-    RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing");
-
-    /* special case : empty frame */
-    if (cctx->stage == ZSTDcs_init) {
-        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
-        FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
-        dstCapacity -= fhSize;
-        op += fhSize;
-        cctx->stage = ZSTDcs_ongoing;
-    }
-
-    if (cctx->stage != ZSTDcs_ending) {
-        /* write one last empty block, make it the "last" block */
-        U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
-        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue");
-        MEM_writeLE32(op, cBlockHeader24);
-        op += ZSTD_blockHeaderSize;
-        dstCapacity -= ZSTD_blockHeaderSize;
-    }
-
-    if (cctx->appliedParams.fParams.checksumFlag) {
-        U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
-        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");
-        DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum);
-        MEM_writeLE32(op, checksum);
-        op += 4;
-    }
-
-    cctx->stage = ZSTDcs_created;  /* return to "created but no init" status */
-    return op-ostart;
-}
-
-void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize)
-{
-#if ZSTD_TRACE
-    if (cctx->traceCtx && ZSTD_trace_compress_end != NULL) {
-        int const streaming = cctx->inBuffSize > 0 || cctx->outBuffSize > 0 || cctx->appliedParams.nbWorkers > 0;
-        ZSTD_Trace trace;
-        ZSTD_memset(&trace, 0, sizeof(trace));
-        trace.version = ZSTD_VERSION_NUMBER;
-        trace.streaming = streaming;
-        trace.dictionaryID = cctx->dictID;
-        trace.dictionarySize = cctx->dictContentSize;
-        trace.uncompressedSize = cctx->consumedSrcSize;
-        trace.compressedSize = cctx->producedCSize + extraCSize;
-        trace.params = &cctx->appliedParams;
-        trace.cctx = cctx;
-        ZSTD_trace_compress_end(cctx->traceCtx, &trace);
-    }
-    cctx->traceCtx = 0;
-#else
-    (void)cctx;
-    (void)extraCSize;
-#endif
-}
-
-size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
-                         void* dst, size_t dstCapacity,
-                   const void* src, size_t srcSize)
-{
-    size_t endResult;
-    size_t const cSize = ZSTD_compressContinue_internal(cctx,
-                                dst, dstCapacity, src, srcSize,
-                                1 /* frame mode */, 1 /* last chunk */);
-    FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed");
-    endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
-    FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed");
-    assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
-    if (cctx->pledgedSrcSizePlusOne != 0) {  /* control src size */
-        ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
-        DEBUGLOG(4, "end of frame : controlling src size");
-        RETURN_ERROR_IF(
-            cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1,
-            srcSize_wrong,
-             "error : pledgedSrcSize = %u, while realSrcSize = %u",
-            (unsigned)cctx->pledgedSrcSizePlusOne-1,
-            (unsigned)cctx->consumedSrcSize);
-    }
-    ZSTD_CCtx_trace(cctx, endResult);
-    return cSize + endResult;
-}
-
-size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
-                               void* dst, size_t dstCapacity,
-                         const void* src, size_t srcSize,
-                         const void* dict,size_t dictSize,
-                               ZSTD_parameters params)
-{
-    DEBUGLOG(4, "ZSTD_compress_advanced");
-    FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
-    ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, &params, ZSTD_NO_CLEVEL);
-    return ZSTD_compress_advanced_internal(cctx,
-                                           dst, dstCapacity,
-                                           src, srcSize,
-                                           dict, dictSize,
-                                           &cctx->simpleApiParams);
-}
-
-/* Internal */
-size_t ZSTD_compress_advanced_internal(
-        ZSTD_CCtx* cctx,
-        void* dst, size_t dstCapacity,
-        const void* src, size_t srcSize,
-        const void* dict,size_t dictSize,
-        const ZSTD_CCtx_params* params)
-{
-    DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize);
-    FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
-                         dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
-                         params, srcSize, ZSTDb_not_buffered) , "");
-    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
-}
-
-size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
-                               void* dst, size_t dstCapacity,
-                         const void* src, size_t srcSize,
-                         const void* dict, size_t dictSize,
-                               int compressionLevel)
-{
-    {
-        ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict);
-        assert(params.fParams.contentSizeFlag == 1);
-        ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel);
-    }
-    DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize);
-    return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctx->simpleApiParams);
-}
-
-size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
-                         void* dst, size_t dstCapacity,
-                   const void* src, size_t srcSize,
-                         int compressionLevel)
-{
-    DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize);
-    assert(cctx != NULL);
-    return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
-}
-
-size_t ZSTD_compress(void* dst, size_t dstCapacity,
-               const void* src, size_t srcSize,
-                     int compressionLevel)
-{
-    size_t result;
-#if ZSTD_COMPRESS_HEAPMODE
-    ZSTD_CCtx* cctx = ZSTD_createCCtx();
-    RETURN_ERROR_IF(!cctx, memory_allocation, "ZSTD_createCCtx failed");
-    result = ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel);
-    ZSTD_freeCCtx(cctx);
-#else
-    ZSTD_CCtx ctxBody;
-    ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem);
-    result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
-    ZSTD_freeCCtxContent(&ctxBody);   /* can't free ctxBody itself, as it's on stack; free only heap content */
-#endif
-    return result;
-}
-
-
-/* =====  Dictionary API  ===== */
-
-/*! ZSTD_estimateCDictSize_advanced() :
- *  Estimate amount of memory that will be needed to create a dictionary with following arguments */
-size_t ZSTD_estimateCDictSize_advanced(
-        size_t dictSize, ZSTD_compressionParameters cParams,
-        ZSTD_dictLoadMethod_e dictLoadMethod)
-{
-    DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));
-    return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
-         + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
-         /* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small
-          * in case we are using DDS with row-hash. */
-         + ZSTD_sizeof_matchState(&cParams, ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams),
-                                  /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0)
-         + (dictLoadMethod == ZSTD_dlm_byRef ? 0
-            : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));
-}
-
-size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
-{
-    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
-    return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy);
-}
-
-size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
-{
-    if (cdict==NULL) return 0;   /* support sizeof on NULL */
-    DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict));
-    /* cdict may be in the workspace */
-    return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict))
-        + ZSTD_cwksp_sizeof(&cdict->workspace);
-}
-
-static size_t ZSTD_initCDict_internal(
-                    ZSTD_CDict* cdict,
-              const void* dictBuffer, size_t dictSize,
-                    ZSTD_dictLoadMethod_e dictLoadMethod,
-                    ZSTD_dictContentType_e dictContentType,
-                    ZSTD_CCtx_params params)
-{
-    DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType);
-    assert(!ZSTD_checkCParams(params.cParams));
-    cdict->matchState.cParams = params.cParams;
-    cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch;
-    if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
-        cdict->dictContent = dictBuffer;
-    } else {
-         void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*)));
-        RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!");
-        cdict->dictContent = internalBuffer;
-        ZSTD_memcpy(internalBuffer, dictBuffer, dictSize);
-    }
-    cdict->dictContentSize = dictSize;
-    cdict->dictContentType = dictContentType;
-
-    cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE);
-
-
-    /* Reset the state to no dictionary */
-    ZSTD_reset_compressedBlockState(&cdict->cBlockState);
-    FORWARD_IF_ERROR(ZSTD_reset_matchState(
-        &cdict->matchState,
-        &cdict->workspace,
-        &params.cParams,
-        params.useRowMatchFinder,
-        ZSTDcrp_makeClean,
-        ZSTDirp_reset,
-        ZSTD_resetTarget_CDict), "");
-    /* (Maybe) load the dictionary
-     * Skips loading the dictionary if it is < 8 bytes.
-     */
-    {   params.compressionLevel = ZSTD_CLEVEL_DEFAULT;
-        params.fParams.contentSizeFlag = 1;
-        {   size_t const dictID = ZSTD_compress_insertDictionary(
-                    &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace,
-                    &params, cdict->dictContent, cdict->dictContentSize,
-                    dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace);
-            FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
-            assert(dictID <= (size_t)(U32)-1);
-            cdict->dictID = (U32)dictID;
-        }
-    }
-
-    return 0;
-}
-
-static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize,
-                                      ZSTD_dictLoadMethod_e dictLoadMethod,
-                                      ZSTD_compressionParameters cParams,
-                                      ZSTD_paramSwitch_e useRowMatchFinder,
-                                      U32 enableDedicatedDictSearch,
-                                      ZSTD_customMem customMem)
-{
-    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
-
-    {   size_t const workspaceSize =
-            ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) +
-            ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) +
-            ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, enableDedicatedDictSearch, /* forCCtx */ 0) +
-            (dictLoadMethod == ZSTD_dlm_byRef ? 0
-             : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));
-        void* const workspace = ZSTD_customMalloc(workspaceSize, customMem);
-        ZSTD_cwksp ws;
-        ZSTD_CDict* cdict;
-
-        if (!workspace) {
-            ZSTD_customFree(workspace, customMem);
-            return NULL;
-        }
-
-        ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_dynamic_alloc);
-
-        cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
-        assert(cdict != NULL);
-        ZSTD_cwksp_move(&cdict->workspace, &ws);
-        cdict->customMem = customMem;
-        cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */
-        cdict->useRowMatchFinder = useRowMatchFinder;
-        return cdict;
-    }
-}
-
-ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
-                                      ZSTD_dictLoadMethod_e dictLoadMethod,
-                                      ZSTD_dictContentType_e dictContentType,
-                                      ZSTD_compressionParameters cParams,
-                                      ZSTD_customMem customMem)
-{
-    ZSTD_CCtx_params cctxParams;
-    ZSTD_memset(&cctxParams, 0, sizeof(cctxParams));
-    ZSTD_CCtxParams_init(&cctxParams, 0);
-    cctxParams.cParams = cParams;
-    cctxParams.customMem = customMem;
-    return ZSTD_createCDict_advanced2(
-        dictBuffer, dictSize,
-        dictLoadMethod, dictContentType,
-        &cctxParams, customMem);
-}
-
-ZSTD_CDict* ZSTD_createCDict_advanced2(
-        const void* dict, size_t dictSize,
-        ZSTD_dictLoadMethod_e dictLoadMethod,
-        ZSTD_dictContentType_e dictContentType,
-        const ZSTD_CCtx_params* originalCctxParams,
-        ZSTD_customMem customMem)
-{
-    ZSTD_CCtx_params cctxParams = *originalCctxParams;
-    ZSTD_compressionParameters cParams;
-    ZSTD_CDict* cdict;
-
-    DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType);
-    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
-
-    if (cctxParams.enableDedicatedDictSearch) {
-        cParams = ZSTD_dedicatedDictSearch_getCParams(
-            cctxParams.compressionLevel, dictSize);
-        ZSTD_overrideCParams(&cParams, &cctxParams.cParams);
-    } else {
-        cParams = ZSTD_getCParamsFromCCtxParams(
-            &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
-    }
-
-    if (!ZSTD_dedicatedDictSearch_isSupported(&cParams)) {
-        /* Fall back to non-DDSS params */
-        cctxParams.enableDedicatedDictSearch = 0;
-        cParams = ZSTD_getCParamsFromCCtxParams(
-            &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
-    }
-
-    DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams.enableDedicatedDictSearch);
-    cctxParams.cParams = cParams;
-    cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
-
-    cdict = ZSTD_createCDict_advanced_internal(dictSize,
-                        dictLoadMethod, cctxParams.cParams,
-                        cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch,
-                        customMem);
-
-    if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
-                                    dict, dictSize,
-                                    dictLoadMethod, dictContentType,
-                                    cctxParams) )) {
-        ZSTD_freeCDict(cdict);
-        return NULL;
-    }
-
-    return cdict;
-}
-
-ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
-{
-    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
-    ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,
-                                                  ZSTD_dlm_byCopy, ZSTD_dct_auto,
-                                                  cParams, ZSTD_defaultCMem);
-    if (cdict)
-        cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
-    return cdict;
-}
-
-ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
-{
-    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
-    ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,
-                                     ZSTD_dlm_byRef, ZSTD_dct_auto,
-                                     cParams, ZSTD_defaultCMem);
-    if (cdict)
-        cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
-    return cdict;
-}
-
-size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
-{
-    if (cdict==NULL) return 0;   /* support free on NULL */
-    {   ZSTD_customMem const cMem = cdict->customMem;
-        int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict);
-        ZSTD_cwksp_free(&cdict->workspace, cMem);
-        if (!cdictInWorkspace) {
-            ZSTD_customFree(cdict, cMem);
-        }
-        return 0;
-    }
-}
-
-/*! ZSTD_initStaticCDict_advanced() :
- *  Generate a digested dictionary in provided memory area.
- *  workspace: The memory area to emplace the dictionary into.
- *             Provided pointer must 8-bytes aligned.
- *             It must outlive dictionary usage.
- *  workspaceSize: Use ZSTD_estimateCDictSize()
- *                 to determine how large workspace must be.
- *  cParams : use ZSTD_getCParams() to transform a compression level
- *            into its relevants cParams.
- * @return : pointer to ZSTD_CDict*, or NULL if error (size too small)
- *  Note : there is no corresponding "free" function.
- *         Since workspace was allocated externally, it must be freed externally.
- */
-const ZSTD_CDict* ZSTD_initStaticCDict(
-                                 void* workspace, size_t workspaceSize,
-                           const void* dict, size_t dictSize,
-                                 ZSTD_dictLoadMethod_e dictLoadMethod,
-                                 ZSTD_dictContentType_e dictContentType,
-                                 ZSTD_compressionParameters cParams)
-{
-    ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams);
-    /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */
-    size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0);
-    size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
-                            + (dictLoadMethod == ZSTD_dlm_byRef ? 0
-                               : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))))
-                            + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
-                            + matchStateSize;
-    ZSTD_CDict* cdict;
-    ZSTD_CCtx_params params;
-
-    if ((size_t)workspace & 7) return NULL;  /* 8-aligned */
-
-    {
-        ZSTD_cwksp ws;
-        ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);
-        cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
-        if (cdict == NULL) return NULL;
-        ZSTD_cwksp_move(&cdict->workspace, &ws);
-    }
-
-    DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u",
-        (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize));
-    if (workspaceSize < neededSize) return NULL;
-
-    ZSTD_CCtxParams_init(&params, 0);
-    params.cParams = cParams;
-    params.useRowMatchFinder = useRowMatchFinder;
-    cdict->useRowMatchFinder = useRowMatchFinder;
-
-    if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
-                                              dict, dictSize,
-                                              dictLoadMethod, dictContentType,
-                                              params) ))
-        return NULL;
-
-    return cdict;
-}
-
-ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict)
-{
-    assert(cdict != NULL);
-    return cdict->matchState.cParams;
-}
-
-/*! ZSTD_getDictID_fromCDict() :
- *  Provides the dictID of the dictionary loaded into `cdict`.
- *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
- *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
-unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict)
-{
-    if (cdict==NULL) return 0;
-    return cdict->dictID;
-}
-
-/* ZSTD_compressBegin_usingCDict_internal() :
- * Implementation of various ZSTD_compressBegin_usingCDict* functions.
- */
-static size_t ZSTD_compressBegin_usingCDict_internal(
-    ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
-    ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
-{
-    ZSTD_CCtx_params cctxParams;
-    DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_internal");
-    RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!");
-    /* Initialize the cctxParams from the cdict */
-    {
-        ZSTD_parameters params;
-        params.fParams = fParams;
-        params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
-                        || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
-                        || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
-                        || cdict->compressionLevel == 0 ) ?
-                ZSTD_getCParamsFromCDict(cdict)
-              : ZSTD_getCParams(cdict->compressionLevel,
-                                pledgedSrcSize,
-                                cdict->dictContentSize);
-        ZSTD_CCtxParams_init_internal(&cctxParams, &params, cdict->compressionLevel);
-    }
-    /* Increase window log to fit the entire dictionary and source if the
-     * source size is known. Limit the increase to 19, which is the
-     * window log for compression level 1 with the largest source size.
-     */
-    if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
-        U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19);
-        U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1;
-        cctxParams.cParams.windowLog = MAX(cctxParams.cParams.windowLog, limitedSrcLog);
-    }
-    return ZSTD_compressBegin_internal(cctx,
-                                        NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast,
-                                        cdict,
-                                        &cctxParams, pledgedSrcSize,
-                                        ZSTDb_not_buffered);
-}
-
-
-/* ZSTD_compressBegin_usingCDict_advanced() :
- * This function is DEPRECATED.
- * cdict must be != NULL */
-size_t ZSTD_compressBegin_usingCDict_advanced(
-    ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
-    ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
-{
-    return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, pledgedSrcSize);
-}
-
-/* ZSTD_compressBegin_usingCDict() :
- * cdict must be != NULL */
-size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
-{
-    ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
-    return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
-}
-
-/*! ZSTD_compress_usingCDict_internal():
- * Implementation of various ZSTD_compress_usingCDict* functions.
- */
-static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx,
-                                void* dst, size_t dstCapacity,
-                                const void* src, size_t srcSize,
-                                const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
-{
-    FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */
-    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
-}
-
-/*! ZSTD_compress_usingCDict_advanced():
- * This function is DEPRECATED.
- */
-size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
-                                void* dst, size_t dstCapacity,
-                                const void* src, size_t srcSize,
-                                const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
-{
-    return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
-}
-
-/*! ZSTD_compress_usingCDict() :
- *  Compression using a digested Dictionary.
- *  Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
- *  Note that compression parameters are decided at CDict creation time
- *  while frame parameters are hardcoded */
-size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
-                                void* dst, size_t dstCapacity,
-                                const void* src, size_t srcSize,
-                                const ZSTD_CDict* cdict)
-{
-    ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
-    return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
-}
-
-
-
-/* ******************************************************************
-*  Streaming
-********************************************************************/
-
-ZSTD_CStream* ZSTD_createCStream(void)
-{
-    DEBUGLOG(3, "ZSTD_createCStream");
-    return ZSTD_createCStream_advanced(ZSTD_defaultCMem);
-}
-
-ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize)
-{
-    return ZSTD_initStaticCCtx(workspace, workspaceSize);
-}
-
-ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)
-{   /* CStream and CCtx are now same object */
-    return ZSTD_createCCtx_advanced(customMem);
-}
-
-size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
-{
-    return ZSTD_freeCCtx(zcs);   /* same object */
-}
-
-
-
-/*======   Initialization   ======*/
-
-size_t ZSTD_CStreamInSize(void)  { return ZSTD_BLOCKSIZE_MAX; }
-
-size_t ZSTD_CStreamOutSize(void)
-{
-    return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
-}
-
-static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize)
-{
-    if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize))
-        return ZSTD_cpm_attachDict;
-    else
-        return ZSTD_cpm_noAttachDict;
-}
-
-/* ZSTD_resetCStream():
- * pledgedSrcSize == 0 means "unknown" */
-size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss)
-{
-    /* temporary : 0 interpreted as "unknown" during transition period.
-     * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
-     * 0 will be interpreted as "empty" in the future.
-     */
-    U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
-    DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize);
-    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
-    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
-    return 0;
-}
-
-/*! ZSTD_initCStream_internal() :
- *  Note : for lib/compress only. Used by zstdmt_compress.c.
- *  Assumption 1 : params are valid
- *  Assumption 2 : either dict, or cdict, is defined, not both */
-size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
-                    const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
-                    const ZSTD_CCtx_params* params,
-                    unsigned long long pledgedSrcSize)
-{
-    DEBUGLOG(4, "ZSTD_initCStream_internal");
-    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
-    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
-    assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
-    zcs->requestedParams = *params;
-    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
-    if (dict) {
-        FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
-    } else {
-        /* Dictionary is cleared if !cdict */
-        FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
-    }
-    return 0;
-}
-
-/* ZSTD_initCStream_usingCDict_advanced() :
- * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
-size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
-                                            const ZSTD_CDict* cdict,
-                                            ZSTD_frameParameters fParams,
-                                            unsigned long long pledgedSrcSize)
-{
-    DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced");
-    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
-    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
-    zcs->requestedParams.fParams = fParams;
-    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
-    return 0;
-}
-
-/* note : cdict must outlive compression session */
-size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
-{
-    DEBUGLOG(4, "ZSTD_initCStream_usingCDict");
-    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
-    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
-    return 0;
-}
-
-
-/* ZSTD_initCStream_advanced() :
- * pledgedSrcSize must be exact.
- * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
- * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */
-size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
-                                 const void* dict, size_t dictSize,
-                                 ZSTD_parameters params, unsigned long long pss)
-{
-    /* for compatibility with older programs relying on this behavior.
-     * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN.
-     * This line will be removed in the future.
-     */
-    U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
-    DEBUGLOG(4, "ZSTD_initCStream_advanced");
-    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
-    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
-    FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
-    ZSTD_CCtxParams_setZstdParams(&zcs->requestedParams, &params);
-    FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
-    return 0;
-}
-
-size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
-{
-    DEBUGLOG(4, "ZSTD_initCStream_usingDict");
-    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
-    FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
-    FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
-    return 0;
-}
-
-size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss)
-{
-    /* temporary : 0 interpreted as "unknown" during transition period.
-     * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
-     * 0 will be interpreted as "empty" in the future.
-     */
-    U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
-    DEBUGLOG(4, "ZSTD_initCStream_srcSize");
-    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
-    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");
-    FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
-    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
-    return 0;
-}
-
-size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
-{
-    DEBUGLOG(4, "ZSTD_initCStream");
-    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
-    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");
-    FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
-    return 0;
-}
-
-/*======   Compression   ======*/
-
-static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx)
-{
-    size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;
-    if (hintInSize==0) hintInSize = cctx->blockSize;
-    return hintInSize;
-}
-
-/** ZSTD_compressStream_generic():
- *  internal function for all *compressStream*() variants
- *  non-static, because can be called from zstdmt_compress.c
- * @return : hint size for next input */
-static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
-                                          ZSTD_outBuffer* output,
-                                          ZSTD_inBuffer* input,
-                                          ZSTD_EndDirective const flushMode)
-{
-    const char* const istart = (const char*)input->src;
-    const char* const iend = input->size != 0 ? istart + input->size : istart;
-    const char* ip = input->pos != 0 ? istart + input->pos : istart;
-    char* const ostart = (char*)output->dst;
-    char* const oend = output->size != 0 ? ostart + output->size : ostart;
-    char* op = output->pos != 0 ? ostart + output->pos : ostart;
-    U32 someMoreWork = 1;
-
-    /* check expectations */
-    DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode);
-    if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
-        assert(zcs->inBuff != NULL);
-        assert(zcs->inBuffSize > 0);
-    }
-    if (zcs->appliedParams.outBufferMode == ZSTD_bm_buffered) {
-        assert(zcs->outBuff !=  NULL);
-        assert(zcs->outBuffSize > 0);
-    }
-    assert(output->pos <= output->size);
-    assert(input->pos <= input->size);
-    assert((U32)flushMode <= (U32)ZSTD_e_end);
-
-    while (someMoreWork) {
-        switch(zcs->streamStage)
-        {
-        case zcss_init:
-            RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!");
-
-        case zcss_load:
-            if ( (flushMode == ZSTD_e_end)
-              && ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip)     /* Enough output space */
-                || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)  /* OR we are allowed to return dstSizeTooSmall */
-              && (zcs->inBuffPos == 0) ) {
-                /* shortcut to compression pass directly into output buffer */
-                size_t const cSize = ZSTD_compressEnd(zcs,
-                                                op, oend-op, ip, iend-ip);
-                DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize);
-                FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed");
-                ip = iend;
-                op += cSize;
-                zcs->frameEnded = 1;
-                ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
-                someMoreWork = 0; break;
-            }
-            /* complete loading into inBuffer in buffered mode */
-            if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
-                size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
-                size_t const loaded = ZSTD_limitCopy(
-                                        zcs->inBuff + zcs->inBuffPos, toLoad,
-                                        ip, iend-ip);
-                zcs->inBuffPos += loaded;
-                if (loaded != 0)
-                    ip += loaded;
-                if ( (flushMode == ZSTD_e_continue)
-                  && (zcs->inBuffPos < zcs->inBuffTarget) ) {
-                    /* not enough input to fill full block : stop here */
-                    someMoreWork = 0; break;
-                }
-                if ( (flushMode == ZSTD_e_flush)
-                  && (zcs->inBuffPos == zcs->inToCompress) ) {
-                    /* empty */
-                    someMoreWork = 0; break;
-                }
-            }
-            /* compress current block (note : this stage cannot be stopped in the middle) */
-            DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
-            {   int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered);
-                void* cDst;
-                size_t cSize;
-                size_t oSize = oend-op;
-                size_t const iSize = inputBuffered
-                    ? zcs->inBuffPos - zcs->inToCompress
-                    : MIN((size_t)(iend - ip), zcs->blockSize);
-                if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)
-                    cDst = op;   /* compress into output buffer, to skip flush stage */
-                else
-                    cDst = zcs->outBuff, oSize = zcs->outBuffSize;
-                if (inputBuffered) {
-                    unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
-                    cSize = lastBlock ?
-                            ZSTD_compressEnd(zcs, cDst, oSize,
-                                        zcs->inBuff + zcs->inToCompress, iSize) :
-                            ZSTD_compressContinue(zcs, cDst, oSize,
-                                        zcs->inBuff + zcs->inToCompress, iSize);
-                    FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
-                    zcs->frameEnded = lastBlock;
-                    /* prepare next block */
-                    zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
-                    if (zcs->inBuffTarget > zcs->inBuffSize)
-                        zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
-                    DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
-                            (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize);
-                    if (!lastBlock)
-                        assert(zcs->inBuffTarget <= zcs->inBuffSize);
-                    zcs->inToCompress = zcs->inBuffPos;
-                } else {
-                    unsigned const lastBlock = (ip + iSize == iend);
-                    assert(flushMode == ZSTD_e_end /* Already validated */);
-                    cSize = lastBlock ?
-                            ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) :
-                            ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize);
-                    /* Consume the input prior to error checking to mirror buffered mode. */
-                    if (iSize > 0)
-                        ip += iSize;
-                    FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
-                    zcs->frameEnded = lastBlock;
-                    if (lastBlock)
-                        assert(ip == iend);
-                }
-                if (cDst == op) {  /* no need to flush */
-                    op += cSize;
-                    if (zcs->frameEnded) {
-                        DEBUGLOG(5, "Frame completed directly in outBuffer");
-                        someMoreWork = 0;
-                        ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
-                    }
-                    break;
-                }
-                zcs->outBuffContentSize = cSize;
-                zcs->outBuffFlushedSize = 0;
-                zcs->streamStage = zcss_flush; /* pass-through to flush stage */
-            }
-	    ZSTD_FALLTHROUGH;
-        case zcss_flush:
-            DEBUGLOG(5, "flush stage");
-            assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered);
-            {   size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
-                size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op),
-                            zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
-                DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
-                            (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed);
-                if (flushed)
-                    op += flushed;
-                zcs->outBuffFlushedSize += flushed;
-                if (toFlush!=flushed) {
-                    /* flush not fully completed, presumably because dst is too small */
-                    assert(op==oend);
-                    someMoreWork = 0;
-                    break;
-                }
-                zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
-                if (zcs->frameEnded) {
-                    DEBUGLOG(5, "Frame completed on flush");
-                    someMoreWork = 0;
-                    ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
-                    break;
-                }
-                zcs->streamStage = zcss_load;
-                break;
-            }
-
-        default: /* impossible */
-            assert(0);
-        }
-    }
-
-    input->pos = ip - istart;
-    output->pos = op - ostart;
-    if (zcs->frameEnded) return 0;
-    return ZSTD_nextInputSizeHint(zcs);
-}
-
-static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx)
-{
-#ifdef ZSTD_MULTITHREAD
-    if (cctx->appliedParams.nbWorkers >= 1) {
-        assert(cctx->mtctx != NULL);
-        return ZSTDMT_nextInputSizeHint(cctx->mtctx);
-    }
-#endif
-    return ZSTD_nextInputSizeHint(cctx);
-
-}
-
-size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
-{
-    FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , "");
-    return ZSTD_nextInputSizeHint_MTorST(zcs);
-}
-
-/* After a compression call set the expected input/output buffer.
- * This is validated at the start of the next compression call.
- */
-static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input)
-{
-    if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
-        cctx->expectedInBuffer = *input;
-    }
-    if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
-        cctx->expectedOutBufferSize = output->size - output->pos;
-    }
-}
-
-/* Validate that the input/output buffers match the expectations set by
- * ZSTD_setBufferExpectations.
- */
-static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx,
-                                        ZSTD_outBuffer const* output,
-                                        ZSTD_inBuffer const* input,
-                                        ZSTD_EndDirective endOp)
-{
-    if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
-        ZSTD_inBuffer const expect = cctx->expectedInBuffer;
-        if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size)
-            RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!");
-        if (endOp != ZSTD_e_end)
-            RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!");
-    }
-    if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
-        size_t const outBufferSize = output->size - output->pos;
-        if (cctx->expectedOutBufferSize != outBufferSize)
-            RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!");
-    }
-    return 0;
-}
-
-static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
-                                             ZSTD_EndDirective endOp,
-                                             size_t inSize) {
-    ZSTD_CCtx_params params = cctx->requestedParams;
-    ZSTD_prefixDict const prefixDict = cctx->prefixDict;
-    FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
-    ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));   /* single usage */
-    assert(prefixDict.dict==NULL || cctx->cdict==NULL);    /* only one can be set */
-    if (cctx->cdict && !cctx->localDict.cdict) {
-        /* Let the cdict's compression level take priority over the requested params.
-         * But do not take the cdict's compression level if the "cdict" is actually a localDict
-         * generated from ZSTD_initLocalDict().
-         */
-        params.compressionLevel = cctx->cdict->compressionLevel;
-    }
-    DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
-    if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1;  /* auto-fix pledgedSrcSize */
-    {
-        size_t const dictSize = prefixDict.dict
-                ? prefixDict.dictSize
-                : (cctx->cdict ? cctx->cdict->dictContentSize : 0);
-        ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, &params, cctx->pledgedSrcSizePlusOne - 1);
-        params.cParams = ZSTD_getCParamsFromCCtxParams(
-                &params, cctx->pledgedSrcSizePlusOne-1,
-                dictSize, mode);
-    }
-
-    params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, &params.cParams);
-    params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, &params.cParams);
-    params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, &params.cParams);
-
-#ifdef ZSTD_MULTITHREAD
-    if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
-        params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
-    }
-    if (params.nbWorkers > 0) {
-#if ZSTD_TRACE
-        cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0;
-#endif
-        /* mt context creation */
-        if (cctx->mtctx == NULL) {
-            DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u",
-                        params.nbWorkers);
-            cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem, cctx->pool);
-            RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!");
-        }
-        /* mt compression */
-        DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers);
-        FORWARD_IF_ERROR( ZSTDMT_initCStream_internal(
-                    cctx->mtctx,
-                    prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,
-                    cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , "");
-        cctx->dictID = cctx->cdict ? cctx->cdict->dictID : 0;
-        cctx->dictContentSize = cctx->cdict ? cctx->cdict->dictContentSize : prefixDict.dictSize;
-        cctx->consumedSrcSize = 0;
-        cctx->producedCSize = 0;
-        cctx->streamStage = zcss_load;
-        cctx->appliedParams = params;
-    } else
-#endif
-    {   U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1;
-        assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
-        FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
-                prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast,
-                cctx->cdict,
-                &params, pledgedSrcSize,
-                ZSTDb_buffered) , "");
-        assert(cctx->appliedParams.nbWorkers == 0);
-        cctx->inToCompress = 0;
-        cctx->inBuffPos = 0;
-        if (cctx->appliedParams.inBufferMode == ZSTD_bm_buffered) {
-            /* for small input: avoid automatic flush on reaching end of block, since
-            * it would require to add a 3-bytes null block to end frame
-            */
-            cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize);
-        } else {
-            cctx->inBuffTarget = 0;
-        }
-        cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;
-        cctx->streamStage = zcss_load;
-        cctx->frameEnded = 0;
-    }
-    return 0;
-}
-
-size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
-                             ZSTD_outBuffer* output,
-                             ZSTD_inBuffer* input,
-                             ZSTD_EndDirective endOp)
-{
-    DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp);
-    /* check conditions */
-    RETURN_ERROR_IF(output->pos > output->size, dstSize_tooSmall, "invalid output buffer");
-    RETURN_ERROR_IF(input->pos  > input->size, srcSize_wrong, "invalid input buffer");
-    RETURN_ERROR_IF((U32)endOp > (U32)ZSTD_e_end, parameter_outOfBound, "invalid endDirective");
-    assert(cctx != NULL);
-
-    /* transparent initialization stage */
-    if (cctx->streamStage == zcss_init) {
-        FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed");
-        ZSTD_setBufferExpectations(cctx, output, input);    /* Set initial buffer expectations now that we've initialized */
-    }
-    /* end of transparent initialization stage */
-
-    FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx, output, input, endOp), "invalid buffers");
-    /* compression stage */
-#ifdef ZSTD_MULTITHREAD
-    if (cctx->appliedParams.nbWorkers > 0) {
-        size_t flushMin;
-        if (cctx->cParamsChanged) {
-            ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams);
-            cctx->cParamsChanged = 0;
-        }
-        for (;;) {
-            size_t const ipos = input->pos;
-            size_t const opos = output->pos;
-            flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
-            cctx->consumedSrcSize += (U64)(input->pos - ipos);
-            cctx->producedCSize += (U64)(output->pos - opos);
-            if ( ZSTD_isError(flushMin)
-              || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
-                if (flushMin == 0)
-                    ZSTD_CCtx_trace(cctx, 0);
-                ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
-            }
-            FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed");
-
-            if (endOp == ZSTD_e_continue) {
-                /* We only require some progress with ZSTD_e_continue, not maximal progress.
-                 * We're done if we've consumed or produced any bytes, or either buffer is
-                 * full.
-                 */
-                if (input->pos != ipos || output->pos != opos || input->pos == input->size || output->pos == output->size)
-                    break;
-            } else {
-                assert(endOp == ZSTD_e_flush || endOp == ZSTD_e_end);
-                /* We require maximal progress. We're done when the flush is complete or the
-                 * output buffer is full.
-                 */
-                if (flushMin == 0 || output->pos == output->size)
-                    break;
-            }
-        }
-        DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic");
-        /* Either we don't require maximum forward progress, we've finished the
-         * flush, or we are out of output space.
-         */
-        assert(endOp == ZSTD_e_continue || flushMin == 0 || output->pos == output->size);
-        ZSTD_setBufferExpectations(cctx, output, input);
-        return flushMin;
-    }
-#endif
-    FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , "");
-    DEBUGLOG(5, "completed ZSTD_compressStream2");
-    ZSTD_setBufferExpectations(cctx, output, input);
-    return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
-}
-
-size_t ZSTD_compressStream2_simpleArgs (
-                            ZSTD_CCtx* cctx,
-                            void* dst, size_t dstCapacity, size_t* dstPos,
-                      const void* src, size_t srcSize, size_t* srcPos,
-                            ZSTD_EndDirective endOp)
-{
-    ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
-    ZSTD_inBuffer  input  = { src, srcSize, *srcPos };
-    /* ZSTD_compressStream2() will check validity of dstPos and srcPos */
-    size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);
-    *dstPos = output.pos;
-    *srcPos = input.pos;
-    return cErr;
-}
-
-size_t ZSTD_compress2(ZSTD_CCtx* cctx,
-                      void* dst, size_t dstCapacity,
-                      const void* src, size_t srcSize)
-{
-    ZSTD_bufferMode_e const originalInBufferMode = cctx->requestedParams.inBufferMode;
-    ZSTD_bufferMode_e const originalOutBufferMode = cctx->requestedParams.outBufferMode;
-    DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize);
-    ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
-    /* Enable stable input/output buffers. */
-    cctx->requestedParams.inBufferMode = ZSTD_bm_stable;
-    cctx->requestedParams.outBufferMode = ZSTD_bm_stable;
-    {   size_t oPos = 0;
-        size_t iPos = 0;
-        size_t const result = ZSTD_compressStream2_simpleArgs(cctx,
-                                        dst, dstCapacity, &oPos,
-                                        src, srcSize, &iPos,
-                                        ZSTD_e_end);
-        /* Reset to the original values. */
-        cctx->requestedParams.inBufferMode = originalInBufferMode;
-        cctx->requestedParams.outBufferMode = originalOutBufferMode;
-        FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed");
-        if (result != 0) {  /* compression not completed, due to lack of output space */
-            assert(oPos == dstCapacity);
-            RETURN_ERROR(dstSize_tooSmall, "");
-        }
-        assert(iPos == srcSize);   /* all input is expected consumed */
-        return oPos;
-    }
-}
-
-typedef struct {
-    U32 idx;             /* Index in array of ZSTD_Sequence */
-    U32 posInSequence;   /* Position within sequence at idx */
-    size_t posInSrc;        /* Number of bytes given by sequences provided so far */
-} ZSTD_sequencePosition;
-
-/* ZSTD_validateSequence() :
- * @offCode : is presumed to follow format required by ZSTD_storeSeq()
- * @returns a ZSTD error code if sequence is not valid
- */
-static size_t
-ZSTD_validateSequence(U32 offCode, U32 matchLength,
-                      size_t posInSrc, U32 windowLog, size_t dictSize)
-{
-    U32 const windowSize = 1 << windowLog;
-    /* posInSrc represents the amount of data the the decoder would decode up to this point.
-     * As long as the amount of data decoded is less than or equal to window size, offsets may be
-     * larger than the total length of output decoded in order to reference the dict, even larger than
-     * window size. After output surpasses windowSize, we're limited to windowSize offsets again.
-     */
-    size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
-    RETURN_ERROR_IF(offCode > STORE_OFFSET(offsetBound), corruption_detected, "Offset too large!");
-    RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small");
-    return 0;
-}
-
-/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */
-static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0)
-{
-    U32 offCode = STORE_OFFSET(rawOffset);
-
-    if (!ll0 && rawOffset == rep[0]) {
-        offCode = STORE_REPCODE_1;
-    } else if (rawOffset == rep[1]) {
-        offCode = STORE_REPCODE(2 - ll0);
-    } else if (rawOffset == rep[2]) {
-        offCode = STORE_REPCODE(3 - ll0);
-    } else if (ll0 && rawOffset == rep[0] - 1) {
-        offCode = STORE_REPCODE_3;
-    }
-    return offCode;
-}
-
-/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
- * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
- */
-static size_t
-ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
-                                              ZSTD_sequencePosition* seqPos,
-                                        const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
-                                        const void* src, size_t blockSize)
-{
-    U32 idx = seqPos->idx;
-    BYTE const* ip = (BYTE const*)(src);
-    const BYTE* const iend = ip + blockSize;
-    repcodes_t updatedRepcodes;
-    U32 dictSize;
-
-    if (cctx->cdict) {
-        dictSize = (U32)cctx->cdict->dictContentSize;
-    } else if (cctx->prefixDict.dict) {
-        dictSize = (U32)cctx->prefixDict.dictSize;
-    } else {
-        dictSize = 0;
-    }
-    ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
-    for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) {
-        U32 const litLength = inSeqs[idx].litLength;
-        U32 const ll0 = (litLength == 0);
-        U32 const matchLength = inSeqs[idx].matchLength;
-        U32 const offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
-        ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
-
-        DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
-        if (cctx->appliedParams.validateSequences) {
-            seqPos->posInSrc += litLength + matchLength;
-            FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
-                                                cctx->appliedParams.cParams.windowLog, dictSize),
-                                                "Sequence validation failed");
-        }
-        RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
-                        "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
-        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength);
-        ip += matchLength + litLength;
-    }
-    ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
-
-    if (inSeqs[idx].litLength) {
-        DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength);
-        ZSTD_storeLastLiterals(&cctx->seqStore, ip, inSeqs[idx].litLength);
-        ip += inSeqs[idx].litLength;
-        seqPos->posInSrc += inSeqs[idx].litLength;
-    }
-    RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!");
-    seqPos->idx = idx+1;
-    return 0;
-}
-
-/* Returns the number of bytes to move the current read position back by. Only non-zero
- * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something
- * went wrong.
- *
- * This function will attempt to scan through blockSize bytes represented by the sequences
- * in inSeqs, storing any (partial) sequences.
- *
- * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
- * avoid splitting a match, or to avoid splitting a match such that it would produce a match
- * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
- */
-static size_t
-ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
-                                   const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
-                                   const void* src, size_t blockSize)
-{
-    U32 idx = seqPos->idx;
-    U32 startPosInSequence = seqPos->posInSequence;
-    U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize;
-    size_t dictSize;
-    BYTE const* ip = (BYTE const*)(src);
-    BYTE const* iend = ip + blockSize;  /* May be adjusted if we decide to process fewer than blockSize bytes */
-    repcodes_t updatedRepcodes;
-    U32 bytesAdjustment = 0;
-    U32 finalMatchSplit = 0;
-
-    if (cctx->cdict) {
-        dictSize = cctx->cdict->dictContentSize;
-    } else if (cctx->prefixDict.dict) {
-        dictSize = cctx->prefixDict.dictSize;
-    } else {
-        dictSize = 0;
-    }
-    DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize);
-    DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
-    ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
-    while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) {
-        const ZSTD_Sequence currSeq = inSeqs[idx];
-        U32 litLength = currSeq.litLength;
-        U32 matchLength = currSeq.matchLength;
-        U32 const rawOffset = currSeq.offset;
-        U32 offCode;
-
-        /* Modify the sequence depending on where endPosInSequence lies */
-        if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
-            if (startPosInSequence >= litLength) {
-                startPosInSequence -= litLength;
-                litLength = 0;
-                matchLength -= startPosInSequence;
-            } else {
-                litLength -= startPosInSequence;
-            }
-            /* Move to the next sequence */
-            endPosInSequence -= currSeq.litLength + currSeq.matchLength;
-            startPosInSequence = 0;
-            idx++;
-        } else {
-            /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence
-               does not reach the end of the match. So, we have to split the sequence */
-            DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u",
-                     currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence);
-            if (endPosInSequence > litLength) {
-                U32 firstHalfMatchLength;
-                litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence;
-                firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength;
-                if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) {
-                    /* Only ever split the match if it is larger than the block size */
-                    U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence;
-                    if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) {
-                        /* Move the endPosInSequence backward so that it creates match of minMatch length */
-                        endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
-                        bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
-                        firstHalfMatchLength -= bytesAdjustment;
-                    }
-                    matchLength = firstHalfMatchLength;
-                    /* Flag that we split the last match - after storing the sequence, exit the loop,
-                       but keep the value of endPosInSequence */
-                    finalMatchSplit = 1;
-                } else {
-                    /* Move the position in sequence backwards so that we don't split match, and break to store
-                     * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence
-                     * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so
-                     * would cause the first half of the match to be too small
-                     */
-                    bytesAdjustment = endPosInSequence - currSeq.litLength;
-                    endPosInSequence = currSeq.litLength;
-                    break;
-                }
-            } else {
-                /* This sequence ends inside the literals, break to store the last literals */
-                break;
-            }
-        }
-        /* Check if this offset can be represented with a repcode */
-        {   U32 const ll0 = (litLength == 0);
-            offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0);
-            ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
-        }
-
-        if (cctx->appliedParams.validateSequences) {
-            seqPos->posInSrc += litLength + matchLength;
-            FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
-                                                   cctx->appliedParams.cParams.windowLog, dictSize),
-                                                   "Sequence validation failed");
-        }
-        DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
-        RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
-                        "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
-        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength);
-        ip += matchLength + litLength;
-    }
-    DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
-    assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
-    seqPos->idx = idx;
-    seqPos->posInSequence = endPosInSequence;
-    ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
-
-    iend -= bytesAdjustment;
-    if (ip != iend) {
-        /* Store any last literals */
-        U32 lastLLSize = (U32)(iend - ip);
-        assert(ip <= iend);
-        DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize);
-        ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize);
-        seqPos->posInSrc += lastLLSize;
-    }
-
-    return bytesAdjustment;
-}
-
-typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
-                                       const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
-                                       const void* src, size_t blockSize);
-static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)
-{
-    ZSTD_sequenceCopier sequenceCopier = NULL;
-    assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode));
-    if (mode == ZSTD_sf_explicitBlockDelimiters) {
-        return ZSTD_copySequencesToSeqStoreExplicitBlockDelim;
-    } else if (mode == ZSTD_sf_noBlockDelimiters) {
-        return ZSTD_copySequencesToSeqStoreNoBlockDelim;
-    }
-    assert(sequenceCopier != NULL);
-    return sequenceCopier;
-}
-
-/* Compress, block-by-block, all of the sequences given.
- *
- * Returns the cumulative size of all compressed blocks (including their headers),
- * otherwise a ZSTD error.
- */
-static size_t
-ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
-                                void* dst, size_t dstCapacity,
-                          const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
-                          const void* src, size_t srcSize)
-{
-    size_t cSize = 0;
-    U32 lastBlock;
-    size_t blockSize;
-    size_t compressedSeqsSize;
-    size_t remaining = srcSize;
-    ZSTD_sequencePosition seqPos = {0, 0, 0};
-
-    BYTE const* ip = (BYTE const*)src;
-    BYTE* op = (BYTE*)dst;
-    ZSTD_sequenceCopier const sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
-
-    DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize);
-    /* Special case: empty frame */
-    if (remaining == 0) {
-        U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1);
-        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header");
-        MEM_writeLE32(op, cBlockHeader24);
-        op += ZSTD_blockHeaderSize;
-        dstCapacity -= ZSTD_blockHeaderSize;
-        cSize += ZSTD_blockHeaderSize;
-    }
-
-    while (remaining) {
-        size_t cBlockSize;
-        size_t additionalByteAdjustment;
-        lastBlock = remaining <= cctx->blockSize;
-        blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize;
-        ZSTD_resetSeqStore(&cctx->seqStore);
-        DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize);
-
-        additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize);
-        FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy");
-        blockSize -= additionalByteAdjustment;
-
-        /* If blocks are too small, emit as a nocompress block */
-        if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
-            cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
-            FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
-            DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize);
-            cSize += cBlockSize;
-            ip += blockSize;
-            op += cBlockSize;
-            remaining -= blockSize;
-            dstCapacity -= cBlockSize;
-            continue;
-        }
-
-        compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore,
-                                &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
-                                &cctx->appliedParams,
-                                op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
-                                blockSize,
-                                cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
-                                cctx->bmi2);
-        FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
-        DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize);
-
-        if (!cctx->isFirstBlock &&
-            ZSTD_maybeRLE(&cctx->seqStore) &&
-            ZSTD_isRLE((BYTE const*)src, srcSize)) {
-            /* We don't want to emit our first block as a RLE even if it qualifies because
-            * doing so will cause the decoder (cli only) to throw a "should consume all input error."
-            * This is only an issue for zstd <= v1.4.3
-            */
-            compressedSeqsSize = 1;
-        }
-
-        if (compressedSeqsSize == 0) {
-            /* ZSTD_noCompressBlock writes the block header as well */
-            cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
-            FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
-            DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize);
-        } else if (compressedSeqsSize == 1) {
-            cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock);
-            FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed");
-            DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize);
-        } else {
-            U32 cBlockHeader;
-            /* Error checking and repcodes update */
-            ZSTD_blockState_confirmRepcodesAndEntropyTables(&cctx->blockState);
-            if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
-                cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
-
-            /* Write block header into beginning of block*/
-            cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3);
-            MEM_writeLE24(op, cBlockHeader);
-            cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize;
-            DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize);
-        }
-
-        cSize += cBlockSize;
-        DEBUGLOG(4, "cSize running total: %zu", cSize);
-
-        if (lastBlock) {
-            break;
-        } else {
-            ip += blockSize;
-            op += cBlockSize;
-            remaining -= blockSize;
-            dstCapacity -= cBlockSize;
-            cctx->isFirstBlock = 0;
-        }
-    }
-
-    return cSize;
-}
-
-size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity,
-                              const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
-                              const void* src, size_t srcSize)
-{
-    BYTE* op = (BYTE*)dst;
-    size_t cSize = 0;
-    size_t compressedBlocksSize = 0;
-    size_t frameHeaderSize = 0;
-
-    /* Transparent initialization stage, same as compressStream2() */
-    DEBUGLOG(3, "ZSTD_compressSequences()");
-    assert(cctx != NULL);
-    FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");
-    /* Begin writing output, starting with frame header */
-    frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID);
-    op += frameHeaderSize;
-    dstCapacity -= frameHeaderSize;
-    cSize += frameHeaderSize;
-    if (cctx->appliedParams.fParams.checksumFlag && srcSize) {
-        XXH64_update(&cctx->xxhState, src, srcSize);
-    }
-    /* cSize includes block header size and compressed sequences size */
-    compressedBlocksSize = ZSTD_compressSequences_internal(cctx,
-                                                           op, dstCapacity,
-                                                           inSeqs, inSeqsSize,
-                                                           src, srcSize);
-    FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!");
-    cSize += compressedBlocksSize;
-    dstCapacity -= compressedBlocksSize;
-
-    if (cctx->appliedParams.fParams.checksumFlag) {
-        U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
-        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");
-        DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum);
-        MEM_writeLE32((char*)dst + cSize, checksum);
-        cSize += 4;
-    }
-
-    DEBUGLOG(3, "Final compressed size: %zu", cSize);
-    return cSize;
-}
-
-/*======   Finalize   ======*/
-
-/*! ZSTD_flushStream() :
- * @return : amount of data remaining to flush */
-size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
-{
-    ZSTD_inBuffer input = { NULL, 0, 0 };
-    return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush);
-}
-
-
-size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
-{
-    ZSTD_inBuffer input = { NULL, 0, 0 };
-    size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end);
-    FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed");
-    if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush;   /* minimal estimation */
-    /* single thread mode : attempt to calculate remaining to flush more precisely */
-    {   size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
-        size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4);
-        size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize;
-        DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush);
-        return toFlush;
-    }
-}
-
-
-/*-=====  Pre-defined compression levels  =====-*/
-#include "clevels.h"
-
-int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
-int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; }
-int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT; }
-
-static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize)
-{
-    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict);
-    switch (cParams.strategy) {
-        case ZSTD_fast:
-        case ZSTD_dfast:
-            break;
-        case ZSTD_greedy:
-        case ZSTD_lazy:
-        case ZSTD_lazy2:
-            cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG;
-            break;
-        case ZSTD_btlazy2:
-        case ZSTD_btopt:
-        case ZSTD_btultra:
-        case ZSTD_btultra2:
-            break;
-    }
-    return cParams;
-}
-
-static int ZSTD_dedicatedDictSearch_isSupported(
-        ZSTD_compressionParameters const* cParams)
-{
-    return (cParams->strategy >= ZSTD_greedy)
-        && (cParams->strategy <= ZSTD_lazy2)
-        && (cParams->hashLog > cParams->chainLog)
-        && (cParams->chainLog <= 24);
-}
-
-/**
- * Reverses the adjustment applied to cparams when enabling dedicated dict
- * search. This is used to recover the params set to be used in the working
- * context. (Otherwise, those tables would also grow.)
- */
-static void ZSTD_dedicatedDictSearch_revertCParams(
-        ZSTD_compressionParameters* cParams) {
-    switch (cParams->strategy) {
-        case ZSTD_fast:
-        case ZSTD_dfast:
-            break;
-        case ZSTD_greedy:
-        case ZSTD_lazy:
-        case ZSTD_lazy2:
-            cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG;
-            if (cParams->hashLog < ZSTD_HASHLOG_MIN) {
-                cParams->hashLog = ZSTD_HASHLOG_MIN;
-            }
-            break;
-        case ZSTD_btlazy2:
-        case ZSTD_btopt:
-        case ZSTD_btultra:
-        case ZSTD_btultra2:
-            break;
-    }
-}
-
-static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
-{
-    switch (mode) {
-    case ZSTD_cpm_unknown:
-    case ZSTD_cpm_noAttachDict:
-    case ZSTD_cpm_createCDict:
-        break;
-    case ZSTD_cpm_attachDict:
-        dictSize = 0;
-        break;
-    default:
-        assert(0);
-        break;
-    }
-    {   int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN;
-        size_t const addedSize = unknown && dictSize > 0 ? 500 : 0;
-        return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize;
-    }
-}
-
-/*! ZSTD_getCParams_internal() :
- * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
- *  Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown.
- *        Use dictSize == 0 for unknown or unused.
- *  Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */
-static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
-{
-    U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode);
-    U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);
-    int row;
-    DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel);
-
-    /* row */
-    if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT;   /* 0 == default */
-    else if (compressionLevel < 0) row = 0;   /* entry 0 is baseline for fast mode */
-    else if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL;
-    else row = compressionLevel;
-
-    {   ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
-        DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID, row, (U32)cp.strategy);
-        /* acceleration factor */
-        if (compressionLevel < 0) {
-            int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel);
-            cp.targetLength = (unsigned)(-clampedCompressionLevel);
-        }
-        /* refine parameters based on srcSize & dictSize */
-        return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode);
-    }
-}
-
-/*! ZSTD_getCParams() :
- * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
- *  Size values are optional, provide 0 if not known or unused */
-ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
-{
-    if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
-    return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
-}
-
-/*! ZSTD_getParams() :
- *  same idea as ZSTD_getCParams()
- * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
- *  Fields of `ZSTD_frameParameters` are set to default values */
-static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) {
-    ZSTD_parameters params;
-    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode);
-    DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel);
-    ZSTD_memset(&params, 0, sizeof(params));
-    params.cParams = cParams;
-    params.fParams.contentSizeFlag = 1;
-    return params;
-}
-
-/*! ZSTD_getParams() :
- *  same idea as ZSTD_getCParams()
- * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
- *  Fields of `ZSTD_frameParameters` are set to default values */
-ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
-    if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
-    return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
-}
diff --git a/dep/zstd/lib/compress/zstd_compress_internal.h b/dep/zstd/lib/compress/zstd_compress_internal.h
deleted file mode 100644
index c406e794b..000000000
--- a/dep/zstd/lib/compress/zstd_compress_internal.h
+++ /dev/null
@@ -1,1458 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-/* This header contains definitions
- * that shall **only** be used by modules within lib/compress.
- */
-
-#ifndef ZSTD_COMPRESS_H
-#define ZSTD_COMPRESS_H
-
-/*-*************************************
-*  Dependencies
-***************************************/
-#include "../common/zstd_internal.h"
-#include "zstd_cwksp.h"
-#ifdef ZSTD_MULTITHREAD
-#  include "zstdmt_compress.h"
-#endif
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-/*-*************************************
-*  Constants
-***************************************/
-#define kSearchStrength      8
-#define HASH_READ_SIZE       8
-#define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
-                                       It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
-                                       It's not a big deal though : candidate will just be sorted again.
-                                       Additionally, candidate position 1 will be lost.
-                                       But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
-                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
-                                       This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
-
-
-/*-*************************************
-*  Context memory management
-***************************************/
-typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
-typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
-
-typedef struct ZSTD_prefixDict_s {
-    const void* dict;
-    size_t dictSize;
-    ZSTD_dictContentType_e dictContentType;
-} ZSTD_prefixDict;
-
-typedef struct {
-    void* dictBuffer;
-    void const* dict;
-    size_t dictSize;
-    ZSTD_dictContentType_e dictContentType;
-    ZSTD_CDict* cdict;
-} ZSTD_localDict;
-
-typedef struct {
-    HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];
-    HUF_repeat repeatMode;
-} ZSTD_hufCTables_t;
-
-typedef struct {
-    FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
-    FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
-    FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
-    FSE_repeat offcode_repeatMode;
-    FSE_repeat matchlength_repeatMode;
-    FSE_repeat litlength_repeatMode;
-} ZSTD_fseCTables_t;
-
-typedef struct {
-    ZSTD_hufCTables_t huf;
-    ZSTD_fseCTables_t fse;
-} ZSTD_entropyCTables_t;
-
-/***********************************************
-*  Entropy buffer statistics structs and funcs *
-***********************************************/
-/** ZSTD_hufCTablesMetadata_t :
- *  Stores Literals Block Type for a super-block in hType, and
- *  huffman tree description in hufDesBuffer.
- *  hufDesSize refers to the size of huffman tree description in bytes.
- *  This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
-typedef struct {
-    symbolEncodingType_e hType;
-    BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
-    size_t hufDesSize;
-} ZSTD_hufCTablesMetadata_t;
-
-/** ZSTD_fseCTablesMetadata_t :
- *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
- *  fse tables in fseTablesBuffer.
- *  fseTablesSize refers to the size of fse tables in bytes.
- *  This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
-typedef struct {
-    symbolEncodingType_e llType;
-    symbolEncodingType_e ofType;
-    symbolEncodingType_e mlType;
-    BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
-    size_t fseTablesSize;
-    size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
-} ZSTD_fseCTablesMetadata_t;
-
-typedef struct {
-    ZSTD_hufCTablesMetadata_t hufMetadata;
-    ZSTD_fseCTablesMetadata_t fseMetadata;
-} ZSTD_entropyCTablesMetadata_t;
-
-/** ZSTD_buildBlockEntropyStats() :
- *  Builds entropy for the block.
- *  @return : 0 on success or error code */
-size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
-                             const ZSTD_entropyCTables_t* prevEntropy,
-                                   ZSTD_entropyCTables_t* nextEntropy,
-                             const ZSTD_CCtx_params* cctxParams,
-                                   ZSTD_entropyCTablesMetadata_t* entropyMetadata,
-                                   void* workspace, size_t wkspSize);
-
-/*********************************
-*  Compression internals structs *
-*********************************/
-
-typedef struct {
-    U32 off;            /* Offset sumtype code for the match, using ZSTD_storeSeq() format */
-    U32 len;            /* Raw length of match */
-} ZSTD_match_t;
-
-typedef struct {
-    U32 offset;         /* Offset of sequence */
-    U32 litLength;      /* Length of literals prior to match */
-    U32 matchLength;    /* Raw length of match */
-} rawSeq;
-
-typedef struct {
-  rawSeq* seq;          /* The start of the sequences */
-  size_t pos;           /* The index in seq where reading stopped. pos <= size. */
-  size_t posInSequence; /* The position within the sequence at seq[pos] where reading
-                           stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
-  size_t size;          /* The number of sequences. <= capacity. */
-  size_t capacity;      /* The capacity starting from `seq` pointer */
-} rawSeqStore_t;
-
-UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
-
-typedef struct {
-    int price;
-    U32 off;
-    U32 mlen;
-    U32 litlen;
-    U32 rep[ZSTD_REP_NUM];
-} ZSTD_optimal_t;
-
-typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
-
-typedef struct {
-    /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
-    unsigned* litFreq;           /* table of literals statistics, of size 256 */
-    unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
-    unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
-    unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
-    ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_NUM+1 */
-    ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
-
-    U32  litSum;                 /* nb of literals */
-    U32  litLengthSum;           /* nb of litLength codes */
-    U32  matchLengthSum;         /* nb of matchLength codes */
-    U32  offCodeSum;             /* nb of offset codes */
-    U32  litSumBasePrice;        /* to compare to log2(litfreq) */
-    U32  litLengthSumBasePrice;  /* to compare to log2(llfreq)  */
-    U32  matchLengthSumBasePrice;/* to compare to log2(mlfreq)  */
-    U32  offCodeSumBasePrice;    /* to compare to log2(offreq)  */
-    ZSTD_OptPrice_e priceType;   /* prices can be determined dynamically, or follow a pre-defined cost structure */
-    const ZSTD_entropyCTables_t* symbolCosts;  /* pre-calculated dictionary statistics */
-    ZSTD_paramSwitch_e literalCompressionMode;
-} optState_t;
-
-typedef struct {
-  ZSTD_entropyCTables_t entropy;
-  U32 rep[ZSTD_REP_NUM];
-} ZSTD_compressedBlockState_t;
-
-typedef struct {
-    BYTE const* nextSrc;       /* next block here to continue on current prefix */
-    BYTE const* base;          /* All regular indexes relative to this position */
-    BYTE const* dictBase;      /* extDict indexes relative to this position */
-    U32 dictLimit;             /* below that point, need extDict */
-    U32 lowLimit;              /* below that point, no more valid data */
-    U32 nbOverflowCorrections; /* Number of times overflow correction has run since
-                                * ZSTD_window_init(). Useful for debugging coredumps
-                                * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
-                                */
-} ZSTD_window_t;
-
-#define ZSTD_WINDOW_START_INDEX 2
-
-typedef struct ZSTD_matchState_t ZSTD_matchState_t;
-
-#define ZSTD_ROW_HASH_CACHE_SIZE 8       /* Size of prefetching hash cache for row-based matchfinder */
-
-struct ZSTD_matchState_t {
-    ZSTD_window_t window;   /* State for window round buffer management */
-    U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
-                             * When loadedDictEnd != 0, a dictionary is in use, and still valid.
-                             * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
-                             * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
-                             * When dict referential is copied into active context (i.e. not attached),
-                             * loadedDictEnd == dictSize, since referential starts from zero.
-                             */
-    U32 nextToUpdate;       /* index from which to continue table update */
-    U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
-
-    U32 rowHashLog;                          /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
-    U16* tagTable;                           /* For row-based matchFinder: A row-based table containing the hashes and head index. */
-    U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
-
-    U32* hashTable;
-    U32* hashTable3;
-    U32* chainTable;
-
-    U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
-
-    int dedicatedDictSearch;  /* Indicates whether this matchState is using the
-                               * dedicated dictionary search structure.
-                               */
-    optState_t opt;         /* optimal parser state */
-    const ZSTD_matchState_t* dictMatchState;
-    ZSTD_compressionParameters cParams;
-    const rawSeqStore_t* ldmSeqStore;
-};
-
-typedef struct {
-    ZSTD_compressedBlockState_t* prevCBlock;
-    ZSTD_compressedBlockState_t* nextCBlock;
-    ZSTD_matchState_t matchState;
-} ZSTD_blockState_t;
-
-typedef struct {
-    U32 offset;
-    U32 checksum;
-} ldmEntry_t;
-
-typedef struct {
-    BYTE const* split;
-    U32 hash;
-    U32 checksum;
-    ldmEntry_t* bucket;
-} ldmMatchCandidate_t;
-
-#define LDM_BATCH_SIZE 64
-
-typedef struct {
-    ZSTD_window_t window;   /* State for the window round buffer management */
-    ldmEntry_t* hashTable;
-    U32 loadedDictEnd;
-    BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
-    size_t splitIndices[LDM_BATCH_SIZE];
-    ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
-} ldmState_t;
-
-typedef struct {
-    ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */
-    U32 hashLog;            /* Log size of hashTable */
-    U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
-    U32 minMatchLength;     /* Minimum match length */
-    U32 hashRateLog;       /* Log number of entries to skip */
-    U32 windowLog;          /* Window log for the LDM */
-} ldmParams_t;
-
-typedef struct {
-    int collectSequences;
-    ZSTD_Sequence* seqStart;
-    size_t seqIndex;
-    size_t maxSequences;
-} SeqCollector;
-
-struct ZSTD_CCtx_params_s {
-    ZSTD_format_e format;
-    ZSTD_compressionParameters cParams;
-    ZSTD_frameParameters fParams;
-
-    int compressionLevel;
-    int forceWindow;           /* force back-references to respect limit of
-                                * 1<<wLog, even for dictionary */
-    size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
-                                * No target when targetCBlockSize == 0.
-                                * There is no guarantee on compressed block size */
-    int srcSizeHint;           /* User's best guess of source size.
-                                * Hint is not valid when srcSizeHint == 0.
-                                * There is no guarantee that hint is close to actual source size */
-
-    ZSTD_dictAttachPref_e attachDictPref;
-    ZSTD_paramSwitch_e literalCompressionMode;
-
-    /* Multithreading: used to pass parameters to mtctx */
-    int nbWorkers;
-    size_t jobSize;
-    int overlapLog;
-    int rsyncable;
-
-    /* Long distance matching parameters */
-    ldmParams_t ldmParams;
-
-    /* Dedicated dict search algorithm trigger */
-    int enableDedicatedDictSearch;
-
-    /* Input/output buffer modes */
-    ZSTD_bufferMode_e inBufferMode;
-    ZSTD_bufferMode_e outBufferMode;
-
-    /* Sequence compression API */
-    ZSTD_sequenceFormat_e blockDelimiters;
-    int validateSequences;
-
-    /* Block splitting */
-    ZSTD_paramSwitch_e useBlockSplitter;
-
-    /* Param for deciding whether to use row-based matchfinder */
-    ZSTD_paramSwitch_e useRowMatchFinder;
-
-    /* Always load a dictionary in ext-dict mode (not prefix mode)? */
-    int deterministicRefPrefix;
-
-    /* Internal use, for createCCtxParams() and freeCCtxParams() only */
-    ZSTD_customMem customMem;
-};  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
-
-#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
-#define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
-
-/**
- * Indicates whether this compression proceeds directly from user-provided
- * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
- * whether the context needs to buffer the input/output (ZSTDb_buffered).
- */
-typedef enum {
-    ZSTDb_not_buffered,
-    ZSTDb_buffered
-} ZSTD_buffered_policy_e;
-
-/**
- * Struct that contains all elements of block splitter that should be allocated
- * in a wksp.
- */
-#define ZSTD_MAX_NB_BLOCK_SPLITS 196
-typedef struct {
-    seqStore_t fullSeqStoreChunk;
-    seqStore_t firstHalfSeqStore;
-    seqStore_t secondHalfSeqStore;
-    seqStore_t currSeqStore;
-    seqStore_t nextSeqStore;
-
-    U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
-    ZSTD_entropyCTablesMetadata_t entropyMetadata;
-} ZSTD_blockSplitCtx;
-
-struct ZSTD_CCtx_s {
-    ZSTD_compressionStage_e stage;
-    int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
-    int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
-    ZSTD_CCtx_params requestedParams;
-    ZSTD_CCtx_params appliedParams;
-    ZSTD_CCtx_params simpleApiParams;    /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
-    U32   dictID;
-    size_t dictContentSize;
-
-    ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
-    size_t blockSize;
-    unsigned long long pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
-    unsigned long long consumedSrcSize;
-    unsigned long long producedCSize;
-    XXH64_state_t xxhState;
-    ZSTD_customMem customMem;
-    ZSTD_threadPool* pool;
-    size_t staticSize;
-    SeqCollector seqCollector;
-    int isFirstBlock;
-    int initialized;
-
-    seqStore_t seqStore;      /* sequences storage ptrs */
-    ldmState_t ldmState;      /* long distance matching state */
-    rawSeq* ldmSequences;     /* Storage for the ldm output sequences */
-    size_t maxNbLdmSequences;
-    rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
-    ZSTD_blockState_t blockState;
-    U32* entropyWorkspace;  /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
-
-    /* Whether we are streaming or not */
-    ZSTD_buffered_policy_e bufferedPolicy;
-
-    /* streaming */
-    char*  inBuff;
-    size_t inBuffSize;
-    size_t inToCompress;
-    size_t inBuffPos;
-    size_t inBuffTarget;
-    char*  outBuff;
-    size_t outBuffSize;
-    size_t outBuffContentSize;
-    size_t outBuffFlushedSize;
-    ZSTD_cStreamStage streamStage;
-    U32    frameEnded;
-
-    /* Stable in/out buffer verification */
-    ZSTD_inBuffer expectedInBuffer;
-    size_t expectedOutBufferSize;
-
-    /* Dictionary */
-    ZSTD_localDict localDict;
-    const ZSTD_CDict* cdict;
-    ZSTD_prefixDict prefixDict;   /* single-usage dictionary */
-
-    /* Multi-threading */
-#ifdef ZSTD_MULTITHREAD
-    ZSTDMT_CCtx* mtctx;
-#endif
-
-    /* Tracing */
-#if ZSTD_TRACE
-    ZSTD_TraceCtx traceCtx;
-#endif
-
-    /* Workspace for block splitter */
-    ZSTD_blockSplitCtx blockSplitCtx;
-};
-
-typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
-
-typedef enum {
-    ZSTD_noDict = 0,
-    ZSTD_extDict = 1,
-    ZSTD_dictMatchState = 2,
-    ZSTD_dedicatedDictSearch = 3
-} ZSTD_dictMode_e;
-
-typedef enum {
-    ZSTD_cpm_noAttachDict = 0,  /* Compression with ZSTD_noDict or ZSTD_extDict.
-                                 * In this mode we use both the srcSize and the dictSize
-                                 * when selecting and adjusting parameters.
-                                 */
-    ZSTD_cpm_attachDict = 1,    /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
-                                 * In this mode we only take the srcSize into account when selecting
-                                 * and adjusting parameters.
-                                 */
-    ZSTD_cpm_createCDict = 2,   /* Creating a CDict.
-                                 * In this mode we take both the source size and the dictionary size
-                                 * into account when selecting and adjusting the parameters.
-                                 */
-    ZSTD_cpm_unknown = 3,       /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
-                                 * We don't know what these parameters are for. We default to the legacy
-                                 * behavior of taking both the source size and the dict size into account
-                                 * when selecting and adjusting parameters.
-                                 */
-} ZSTD_cParamMode_e;
-
-typedef size_t (*ZSTD_blockCompressor) (
-        ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
-
-
-MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
-{
-    static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
-                                       8,  9, 10, 11, 12, 13, 14, 15,
-                                      16, 16, 17, 17, 18, 18, 19, 19,
-                                      20, 20, 20, 20, 21, 21, 21, 21,
-                                      22, 22, 22, 22, 22, 22, 22, 22,
-                                      23, 23, 23, 23, 23, 23, 23, 23,
-                                      24, 24, 24, 24, 24, 24, 24, 24,
-                                      24, 24, 24, 24, 24, 24, 24, 24 };
-    static const U32 LL_deltaCode = 19;
-    return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
-}
-
-/* ZSTD_MLcode() :
- * note : mlBase = matchLength - MINMATCH;
- *        because it's the format it's stored in seqStore->sequences */
-MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
-{
-    static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
-                                      16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-                                      32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
-                                      38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
-                                      40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
-                                      41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
-                                      42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
-                                      42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
-    static const U32 ML_deltaCode = 36;
-    return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
-}
-
-/* ZSTD_cParam_withinBounds:
- * @return 1 if value is within cParam bounds,
- * 0 otherwise */
-MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
-{
-    ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
-    if (ZSTD_isError(bounds.error)) return 0;
-    if (value < bounds.lowerBound) return 0;
-    if (value > bounds.upperBound) return 0;
-    return 1;
-}
-
-/* ZSTD_noCompressBlock() :
- * Writes uncompressed block to dst buffer from given src.
- * Returns the size of the block */
-MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
-{
-    U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
-    RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
-                    dstSize_tooSmall, "dst buf too small for uncompressed block");
-    MEM_writeLE24(dst, cBlockHeader24);
-    ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
-    return ZSTD_blockHeaderSize + srcSize;
-}
-
-MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
-{
-    BYTE* const op = (BYTE*)dst;
-    U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
-    RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
-    MEM_writeLE24(op, cBlockHeader);
-    op[3] = src;
-    return 4;
-}
-
-
-/* ZSTD_minGain() :
- * minimum compression required
- * to generate a compress block or a compressed literals section.
- * note : use same formula for both situations */
-MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
-{
-    U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
-    ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
-    assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
-    return (srcSize >> minlog) + 2;
-}
-
-MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)
-{
-    switch (cctxParams->literalCompressionMode) {
-    case ZSTD_ps_enable:
-        return 0;
-    case ZSTD_ps_disable:
-        return 1;
-    default:
-        assert(0 /* impossible: pre-validated */);
-        ZSTD_FALLTHROUGH;
-    case ZSTD_ps_auto:
-        return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
-    }
-}
-
-/*! ZSTD_safecopyLiterals() :
- *  memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
- *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
- *  large copies.
- */
-static void
-ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w)
-{
-    assert(iend > ilimit_w);
-    if (ip <= ilimit_w) {
-        ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
-        op += ilimit_w - ip;
-        ip = ilimit_w;
-    }
-    while (ip < iend) *op++ = *ip++;
-}
-
-#define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
-#define STORE_REPCODE_1 STORE_REPCODE(1)
-#define STORE_REPCODE_2 STORE_REPCODE(2)
-#define STORE_REPCODE_3 STORE_REPCODE(3)
-#define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1)
-#define STORE_OFFSET(o)  (assert((o)>0), o + ZSTD_REP_MOVE)
-#define STORED_IS_OFFSET(o)  ((o) > ZSTD_REP_MOVE)
-#define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE)
-#define STORED_OFFSET(o)  (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)
-#define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1)  /* returns ID 1,2,3 */
-#define STORED_TO_OFFBASE(o) ((o)+1)
-#define OFFBASE_TO_STORED(o) ((o)-1)
-
-/*! ZSTD_storeSeq() :
- *  Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
- *  @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET().
- *  @matchLength : must be >= MINMATCH
- *  Allowed to overread literals up to litLimit.
-*/
-HINT_INLINE UNUSED_ATTR void
-ZSTD_storeSeq(seqStore_t* seqStorePtr,
-              size_t litLength, const BYTE* literals, const BYTE* litLimit,
-              U32 offBase_minus1,
-              size_t matchLength)
-{
-    BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
-    BYTE const* const litEnd = literals + litLength;
-#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
-    static const BYTE* g_start = NULL;
-    if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
-    {   U32 const pos = (U32)((const BYTE*)literals - g_start);
-        DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
-               pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1);
-    }
-#endif
-    assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
-    /* copy Literals */
-    assert(seqStorePtr->maxNbLit <= 128 KB);
-    assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
-    assert(literals + litLength <= litLimit);
-    if (litEnd <= litLimit_w) {
-        /* Common case we can use wildcopy.
-	 * First copy 16 bytes, because literals are likely short.
-	 */
-        assert(WILDCOPY_OVERLENGTH >= 16);
-        ZSTD_copy16(seqStorePtr->lit, literals);
-        if (litLength > 16) {
-            ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
-        }
-    } else {
-        ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
-    }
-    seqStorePtr->lit += litLength;
-
-    /* literal Length */
-    if (litLength>0xFFFF) {
-        assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
-        seqStorePtr->longLengthType = ZSTD_llt_literalLength;
-        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
-    }
-    seqStorePtr->sequences[0].litLength = (U16)litLength;
-
-    /* match offset */
-    seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1);
-
-    /* match Length */
-    assert(matchLength >= MINMATCH);
-    {   size_t const mlBase = matchLength - MINMATCH;
-        if (mlBase>0xFFFF) {
-            assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
-            seqStorePtr->longLengthType = ZSTD_llt_matchLength;
-            seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
-        }
-        seqStorePtr->sequences[0].mlBase = (U16)mlBase;
-    }
-
-    seqStorePtr->sequences++;
-}
-
-/* ZSTD_updateRep() :
- * updates in-place @rep (array of repeat offsets)
- * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq()
- */
-MEM_STATIC void
-ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
-{
-    if (STORED_IS_OFFSET(offBase_minus1)) {  /* full offset */
-        rep[2] = rep[1];
-        rep[1] = rep[0];
-        rep[0] = STORED_OFFSET(offBase_minus1);
-    } else {   /* repcode */
-        U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0;
-        if (repCode > 0) {  /* note : if repCode==0, no change */
-            U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
-            rep[2] = (repCode >= 2) ? rep[1] : rep[2];
-            rep[1] = rep[0];
-            rep[0] = currentOffset;
-        } else {   /* repCode == 0 */
-            /* nothing to do */
-        }
-    }
-}
-
-typedef struct repcodes_s {
-    U32 rep[3];
-} repcodes_t;
-
-MEM_STATIC repcodes_t
-ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
-{
-    repcodes_t newReps;
-    ZSTD_memcpy(&newReps, rep, sizeof(newReps));
-    ZSTD_updateRep(newReps.rep, offBase_minus1, ll0);
-    return newReps;
-}
-
-
-/*-*************************************
-*  Match length counter
-***************************************/
-static unsigned ZSTD_NbCommonBytes (size_t val)
-{
-    if (MEM_isLittleEndian()) {
-        if (MEM_64bits()) {
-#       if defined(_MSC_VER) && defined(_WIN64)
-#           if STATIC_BMI2
-                return _tzcnt_u64(val) >> 3;
-#           else
-                if (val != 0) {
-                    unsigned long r;
-                    _BitScanForward64(&r, (U64)val);
-                    return (unsigned)(r >> 3);
-                } else {
-                    /* Should not reach this code path */
-                    __assume(0);
-                }
-#           endif
-#       elif defined(__GNUC__) && (__GNUC__ >= 4)
-            return (__builtin_ctzll((U64)val) >> 3);
-#       else
-            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
-                                                     0, 3, 1, 3, 1, 4, 2, 7,
-                                                     0, 2, 3, 6, 1, 5, 3, 5,
-                                                     1, 3, 4, 4, 2, 5, 6, 7,
-                                                     7, 0, 1, 2, 3, 3, 4, 6,
-                                                     2, 6, 5, 5, 3, 4, 5, 6,
-                                                     7, 1, 2, 4, 6, 4, 4, 5,
-                                                     7, 2, 6, 5, 7, 6, 7, 7 };
-            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
-#       endif
-        } else { /* 32 bits */
-#       if defined(_MSC_VER)
-            if (val != 0) {
-                unsigned long r;
-                _BitScanForward(&r, (U32)val);
-                return (unsigned)(r >> 3);
-            } else {
-                /* Should not reach this code path */
-                __assume(0);
-            }
-#       elif defined(__GNUC__) && (__GNUC__ >= 3)
-            return (__builtin_ctz((U32)val) >> 3);
-#       else
-            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
-                                                     3, 2, 2, 1, 3, 2, 0, 1,
-                                                     3, 3, 1, 2, 2, 2, 2, 0,
-                                                     3, 1, 2, 0, 1, 0, 1, 1 };
-            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
-#       endif
-        }
-    } else {  /* Big Endian CPU */
-        if (MEM_64bits()) {
-#       if defined(_MSC_VER) && defined(_WIN64)
-#           if STATIC_BMI2
-			    return _lzcnt_u64(val) >> 3;
-#           else
-                if (val != 0) {
-                    unsigned long r;
-                    _BitScanReverse64(&r, (U64)val);
-                    return (unsigned)(r >> 3);
-                } else {
-                    /* Should not reach this code path */
-                    __assume(0);
-                }
-#           endif
-#       elif defined(__GNUC__) && (__GNUC__ >= 4)
-            return (__builtin_clzll(val) >> 3);
-#       else
-            unsigned r;
-            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
-            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
-            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
-            r += (!val);
-            return r;
-#       endif
-        } else { /* 32 bits */
-#       if defined(_MSC_VER)
-            if (val != 0) {
-                unsigned long r;
-                _BitScanReverse(&r, (unsigned long)val);
-                return (unsigned)(r >> 3);
-            } else {
-                /* Should not reach this code path */
-                __assume(0);
-            }
-#       elif defined(__GNUC__) && (__GNUC__ >= 3)
-            return (__builtin_clz((U32)val) >> 3);
-#       else
-            unsigned r;
-            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
-            r += (!val);
-            return r;
-#       endif
-    }   }
-}
-
-
-MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
-{
-    const BYTE* const pStart = pIn;
-    const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
-
-    if (pIn < pInLoopLimit) {
-        { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
-          if (diff) return ZSTD_NbCommonBytes(diff); }
-        pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
-        while (pIn < pInLoopLimit) {
-            size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
-            if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
-            pIn += ZSTD_NbCommonBytes(diff);
-            return (size_t)(pIn - pStart);
-    }   }
-    if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
-    if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
-    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
-    return (size_t)(pIn - pStart);
-}
-
-/** ZSTD_count_2segments() :
- *  can count match length with `ip` & `match` in 2 different segments.
- *  convention : on reaching mEnd, match count continue starting from iStart
- */
-MEM_STATIC size_t
-ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
-                     const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
-{
-    const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
-    size_t const matchLength = ZSTD_count(ip, match, vEnd);
-    if (match + matchLength != mEnd) return matchLength;
-    DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
-    DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
-    DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
-    DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
-    DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
-    return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
-}
-
-
-/*-*************************************
- *  Hashes
- ***************************************/
-static const U32 prime3bytes = 506832829U;
-static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
-MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
-
-static const U32 prime4bytes = 2654435761U;
-static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
-static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
-
-static const U64 prime5bytes = 889523592379ULL;
-static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u  << (64-40)) * prime5bytes) >> (64-h)) ; }
-static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
-
-static const U64 prime6bytes = 227718039650203ULL;
-static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
-static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
-
-static const U64 prime7bytes = 58295818150454627ULL;
-static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; }
-static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
-
-static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
-static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
-static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
-
-MEM_STATIC FORCE_INLINE_ATTR
-size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
-{
-    switch(mls)
-    {
-    default:
-    case 4: return ZSTD_hash4Ptr(p, hBits);
-    case 5: return ZSTD_hash5Ptr(p, hBits);
-    case 6: return ZSTD_hash6Ptr(p, hBits);
-    case 7: return ZSTD_hash7Ptr(p, hBits);
-    case 8: return ZSTD_hash8Ptr(p, hBits);
-    }
-}
-
-/** ZSTD_ipow() :
- * Return base^exponent.
- */
-static U64 ZSTD_ipow(U64 base, U64 exponent)
-{
-    U64 power = 1;
-    while (exponent) {
-      if (exponent & 1) power *= base;
-      exponent >>= 1;
-      base *= base;
-    }
-    return power;
-}
-
-#define ZSTD_ROLL_HASH_CHAR_OFFSET 10
-
-/** ZSTD_rollingHash_append() :
- * Add the buffer to the hash value.
- */
-static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
-{
-    BYTE const* istart = (BYTE const*)buf;
-    size_t pos;
-    for (pos = 0; pos < size; ++pos) {
-        hash *= prime8bytes;
-        hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
-    }
-    return hash;
-}
-
-/** ZSTD_rollingHash_compute() :
- * Compute the rolling hash value of the buffer.
- */
-MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
-{
-    return ZSTD_rollingHash_append(0, buf, size);
-}
-
-/** ZSTD_rollingHash_primePower() :
- * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
- * over a window of length bytes.
- */
-MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
-{
-    return ZSTD_ipow(prime8bytes, length - 1);
-}
-
-/** ZSTD_rollingHash_rotate() :
- * Rotate the rolling hash by one byte.
- */
-MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
-{
-    hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
-    hash *= prime8bytes;
-    hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
-    return hash;
-}
-
-/*-*************************************
-*  Round buffer management
-***************************************/
-#if (ZSTD_WINDOWLOG_MAX_64 > 31)
-# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
-#endif
-/* Max current allowed */
-#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
-/* Maximum chunk size before overflow correction needs to be called again */
-#define ZSTD_CHUNKSIZE_MAX                                                     \
-    ( ((U32)-1)                  /* Maximum ending current index */            \
-    - ZSTD_CURRENT_MAX)          /* Maximum beginning lowLimit */
-
-/**
- * ZSTD_window_clear():
- * Clears the window containing the history by simply setting it to empty.
- */
-MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
-{
-    size_t const endT = (size_t)(window->nextSrc - window->base);
-    U32 const end = (U32)endT;
-
-    window->lowLimit = end;
-    window->dictLimit = end;
-}
-
-MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
-{
-    return window.dictLimit == ZSTD_WINDOW_START_INDEX &&
-           window.lowLimit == ZSTD_WINDOW_START_INDEX &&
-           (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;
-}
-
-/**
- * ZSTD_window_hasExtDict():
- * Returns non-zero if the window has a non-empty extDict.
- */
-MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
-{
-    return window.lowLimit < window.dictLimit;
-}
-
-/**
- * ZSTD_matchState_dictMode():
- * Inspects the provided matchState and figures out what dictMode should be
- * passed to the compressor.
- */
-MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
-{
-    return ZSTD_window_hasExtDict(ms->window) ?
-        ZSTD_extDict :
-        ms->dictMatchState != NULL ?
-            (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
-            ZSTD_noDict;
-}
-
-/* Defining this macro to non-zero tells zstd to run the overflow correction
- * code much more frequently. This is very inefficient, and should only be
- * used for tests and fuzzers.
- */
-#ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
-#  ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
-#    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
-#  else
-#    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
-#  endif
-#endif
-
-/**
- * ZSTD_window_canOverflowCorrect():
- * Returns non-zero if the indices are large enough for overflow correction
- * to work correctly without impacting compression ratio.
- */
-MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
-                                              U32 cycleLog,
-                                              U32 maxDist,
-                                              U32 loadedDictEnd,
-                                              void const* src)
-{
-    U32 const cycleSize = 1u << cycleLog;
-    U32 const curr = (U32)((BYTE const*)src - window.base);
-    U32 const minIndexToOverflowCorrect = cycleSize
-                                        + MAX(maxDist, cycleSize)
-                                        + ZSTD_WINDOW_START_INDEX;
-
-    /* Adjust the min index to backoff the overflow correction frequency,
-     * so we don't waste too much CPU in overflow correction. If this
-     * computation overflows we don't really care, we just need to make
-     * sure it is at least minIndexToOverflowCorrect.
-     */
-    U32 const adjustment = window.nbOverflowCorrections + 1;
-    U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
-                                  minIndexToOverflowCorrect);
-    U32 const indexLargeEnough = curr > adjustedIndex;
-
-    /* Only overflow correct early if the dictionary is invalidated already,
-     * so we don't hurt compression ratio.
-     */
-    U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
-
-    return indexLargeEnough && dictionaryInvalidated;
-}
-
-/**
- * ZSTD_window_needOverflowCorrection():
- * Returns non-zero if the indices are getting too large and need overflow
- * protection.
- */
-MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
-                                                  U32 cycleLog,
-                                                  U32 maxDist,
-                                                  U32 loadedDictEnd,
-                                                  void const* src,
-                                                  void const* srcEnd)
-{
-    U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
-    if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
-        if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
-            return 1;
-        }
-    }
-    return curr > ZSTD_CURRENT_MAX;
-}
-
-/**
- * ZSTD_window_correctOverflow():
- * Reduces the indices to protect from index overflow.
- * Returns the correction made to the indices, which must be applied to every
- * stored index.
- *
- * The least significant cycleLog bits of the indices must remain the same,
- * which may be 0. Every index up to maxDist in the past must be valid.
- */
-MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
-                                           U32 maxDist, void const* src)
-{
-    /* preemptive overflow correction:
-     * 1. correction is large enough:
-     *    lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
-     *    1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
-     *
-     *    current - newCurrent
-     *    > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
-     *    > (3<<29) - (1<<chainLog)
-     *    > (3<<29) - (1<<30)             (NOTE: chainLog <= 30)
-     *    > 1<<29
-     *
-     * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
-     *    After correction, current is less than (1<<chainLog + 1<<windowLog).
-     *    In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
-     *    In 32-bit mode we are safe, because (chainLog <= 29), so
-     *    ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
-     * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
-     *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
-     */
-    U32 const cycleSize = 1u << cycleLog;
-    U32 const cycleMask = cycleSize - 1;
-    U32 const curr = (U32)((BYTE const*)src - window->base);
-    U32 const currentCycle = curr & cycleMask;
-    /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */
-    U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX
-                                     ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX)
-                                     : 0;
-    U32 const newCurrent = currentCycle
-                         + currentCycleCorrection
-                         + MAX(maxDist, cycleSize);
-    U32 const correction = curr - newCurrent;
-    /* maxDist must be a power of two so that:
-     *   (newCurrent & cycleMask) == (curr & cycleMask)
-     * This is required to not corrupt the chains / binary tree.
-     */
-    assert((maxDist & (maxDist - 1)) == 0);
-    assert((curr & cycleMask) == (newCurrent & cycleMask));
-    assert(curr > newCurrent);
-    if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
-        /* Loose bound, should be around 1<<29 (see above) */
-        assert(correction > 1<<28);
-    }
-
-    window->base += correction;
-    window->dictBase += correction;
-    if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) {
-        window->lowLimit = ZSTD_WINDOW_START_INDEX;
-    } else {
-        window->lowLimit -= correction;
-    }
-    if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) {
-        window->dictLimit = ZSTD_WINDOW_START_INDEX;
-    } else {
-        window->dictLimit -= correction;
-    }
-
-    /* Ensure we can still reference the full window. */
-    assert(newCurrent >= maxDist);
-    assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);
-    /* Ensure that lowLimit and dictLimit didn't underflow. */
-    assert(window->lowLimit <= newCurrent);
-    assert(window->dictLimit <= newCurrent);
-
-    ++window->nbOverflowCorrections;
-
-    DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
-             window->lowLimit);
-    return correction;
-}
-
-/**
- * ZSTD_window_enforceMaxDist():
- * Updates lowLimit so that:
- *    (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
- *
- * It ensures index is valid as long as index >= lowLimit.
- * This must be called before a block compression call.
- *
- * loadedDictEnd is only defined if a dictionary is in use for current compression.
- * As the name implies, loadedDictEnd represents the index at end of dictionary.
- * The value lies within context's referential, it can be directly compared to blockEndIdx.
- *
- * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
- * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
- * This is because dictionaries are allowed to be referenced fully
- * as long as the last byte of the dictionary is in the window.
- * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
- *
- * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
- * In dictMatchState mode, lowLimit and dictLimit are the same,
- * and the dictionary is below them.
- * forceWindow and dictMatchState are therefore incompatible.
- */
-MEM_STATIC void
-ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
-                     const void* blockEnd,
-                           U32   maxDist,
-                           U32*  loadedDictEndPtr,
-                     const ZSTD_matchState_t** dictMatchStatePtr)
-{
-    U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
-    U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
-    DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
-                (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
-
-    /* - When there is no dictionary : loadedDictEnd == 0.
-         In which case, the test (blockEndIdx > maxDist) is merely to avoid
-         overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
-       - When there is a standard dictionary :
-         Index referential is copied from the dictionary,
-         which means it starts from 0.
-         In which case, loadedDictEnd == dictSize,
-         and it makes sense to compare `blockEndIdx > maxDist + dictSize`
-         since `blockEndIdx` also starts from zero.
-       - When there is an attached dictionary :
-         loadedDictEnd is expressed within the referential of the context,
-         so it can be directly compared against blockEndIdx.
-    */
-    if (blockEndIdx > maxDist + loadedDictEnd) {
-        U32 const newLowLimit = blockEndIdx - maxDist;
-        if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
-        if (window->dictLimit < window->lowLimit) {
-            DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
-                        (unsigned)window->dictLimit, (unsigned)window->lowLimit);
-            window->dictLimit = window->lowLimit;
-        }
-        /* On reaching window size, dictionaries are invalidated */
-        if (loadedDictEndPtr) *loadedDictEndPtr = 0;
-        if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
-    }
-}
-
-/* Similar to ZSTD_window_enforceMaxDist(),
- * but only invalidates dictionary
- * when input progresses beyond window size.
- * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
- *              loadedDictEnd uses same referential as window->base
- *              maxDist is the window size */
-MEM_STATIC void
-ZSTD_checkDictValidity(const ZSTD_window_t* window,
-                       const void* blockEnd,
-                             U32   maxDist,
-                             U32*  loadedDictEndPtr,
-                       const ZSTD_matchState_t** dictMatchStatePtr)
-{
-    assert(loadedDictEndPtr != NULL);
-    assert(dictMatchStatePtr != NULL);
-    {   U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
-        U32 const loadedDictEnd = *loadedDictEndPtr;
-        DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
-                    (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
-        assert(blockEndIdx >= loadedDictEnd);
-
-        if (blockEndIdx > loadedDictEnd + maxDist) {
-            /* On reaching window size, dictionaries are invalidated.
-             * For simplification, if window size is reached anywhere within next block,
-             * the dictionary is invalidated for the full block.
-             */
-            DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
-            *loadedDictEndPtr = 0;
-            *dictMatchStatePtr = NULL;
-        } else {
-            if (*loadedDictEndPtr != 0) {
-                DEBUGLOG(6, "dictionary considered valid for current block");
-    }   }   }
-}
-
-MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
-    ZSTD_memset(window, 0, sizeof(*window));
-    window->base = (BYTE const*)" ";
-    window->dictBase = (BYTE const*)" ";
-    ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */
-    window->dictLimit = ZSTD_WINDOW_START_INDEX;    /* start from >0, so that 1st position is valid */
-    window->lowLimit = ZSTD_WINDOW_START_INDEX;     /* it ensures first and later CCtx usages compress the same */
-    window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX;   /* see issue #1241 */
-    window->nbOverflowCorrections = 0;
-}
-
-/**
- * ZSTD_window_update():
- * Updates the window by appending [src, src + srcSize) to the window.
- * If it is not contiguous, the current prefix becomes the extDict, and we
- * forget about the extDict. Handles overlap of the prefix and extDict.
- * Returns non-zero if the segment is contiguous.
- */
-MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
-                                  void const* src, size_t srcSize,
-                                  int forceNonContiguous)
-{
-    BYTE const* const ip = (BYTE const*)src;
-    U32 contiguous = 1;
-    DEBUGLOG(5, "ZSTD_window_update");
-    if (srcSize == 0)
-        return contiguous;
-    assert(window->base != NULL);
-    assert(window->dictBase != NULL);
-    /* Check if blocks follow each other */
-    if (src != window->nextSrc || forceNonContiguous) {
-        /* not contiguous */
-        size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
-        DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
-        window->lowLimit = window->dictLimit;
-        assert(distanceFromBase == (size_t)(U32)distanceFromBase);  /* should never overflow */
-        window->dictLimit = (U32)distanceFromBase;
-        window->dictBase = window->base;
-        window->base = ip - distanceFromBase;
-        /* ms->nextToUpdate = window->dictLimit; */
-        if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit;   /* too small extDict */
-        contiguous = 0;
-    }
-    window->nextSrc = ip + srcSize;
-    /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
-    if ( (ip+srcSize > window->dictBase + window->lowLimit)
-       & (ip < window->dictBase + window->dictLimit)) {
-        ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
-        U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
-        window->lowLimit = lowLimitMax;
-        DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
-    }
-    return contiguous;
-}
-
-/**
- * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
- */
-MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
-{
-    U32 const maxDistance = 1U << windowLog;
-    U32 const lowestValid = ms->window.lowLimit;
-    U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
-    U32 const isDictionary = (ms->loadedDictEnd != 0);
-    /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
-     * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
-     * valid for the entire block. So this check is sufficient to find the lowest valid match index.
-     */
-    U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
-    return matchLowest;
-}
-
-/**
- * Returns the lowest allowed match index in the prefix.
- */
-MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
-{
-    U32    const maxDistance = 1U << windowLog;
-    U32    const lowestValid = ms->window.dictLimit;
-    U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
-    U32    const isDictionary = (ms->loadedDictEnd != 0);
-    /* When computing the lowest prefix index we need to take the dictionary into account to handle
-     * the edge case where the dictionary and the source are contiguous in memory.
-     */
-    U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
-    return matchLowest;
-}
-
-
-
-/* debug functions */
-#if (DEBUGLEVEL>=2)
-
-MEM_STATIC double ZSTD_fWeight(U32 rawStat)
-{
-    U32 const fp_accuracy = 8;
-    U32 const fp_multiplier = (1 << fp_accuracy);
-    U32 const newStat = rawStat + 1;
-    U32 const hb = ZSTD_highbit32(newStat);
-    U32 const BWeight = hb * fp_multiplier;
-    U32 const FWeight = (newStat << fp_accuracy) >> hb;
-    U32 const weight = BWeight + FWeight;
-    assert(hb + fp_accuracy < 31);
-    return (double)weight / fp_multiplier;
-}
-
-/* display a table content,
- * listing each element, its frequency, and its predicted bit cost */
-MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
-{
-    unsigned u, sum;
-    for (u=0, sum=0; u<=max; u++) sum += table[u];
-    DEBUGLOG(2, "total nb elts: %u", sum);
-    for (u=0; u<=max; u++) {
-        DEBUGLOG(2, "%2u: %5u  (%.2f)",
-                u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
-    }
-}
-
-#endif
-
-
-#if defined (__cplusplus)
-}
-#endif
-
-/* ===============================================================
- * Shared internal declarations
- * These prototypes may be called from sources not in lib/compress
- * =============================================================== */
-
-/* ZSTD_loadCEntropy() :
- * dict : must point at beginning of a valid zstd dictionary.
- * return : size of dictionary header (size of magic number + dict ID + entropy tables)
- * assumptions : magic number supposed already checked
- *               and dictSize >= 8 */
-size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
-                         const void* const dict, size_t dictSize);
-
-void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
-
-/* ==============================================================
- * Private declarations
- * These prototypes shall only be called from within lib/compress
- * ============================================================== */
-
-/* ZSTD_getCParamsFromCCtxParams() :
- * cParams are built depending on compressionLevel, src size hints,
- * LDM and manually set compression parameters.
- * Note: srcSizeHint == 0 means 0!
- */
-ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
-        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
-
-/*! ZSTD_initCStream_internal() :
- *  Private use only. Init streaming operation.
- *  expects params to be valid.
- *  must receive dict, or cdict, or none, but not both.
- *  @return : 0, or an error code */
-size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
-                     const void* dict, size_t dictSize,
-                     const ZSTD_CDict* cdict,
-                     const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
-
-void ZSTD_resetSeqStore(seqStore_t* ssPtr);
-
-/*! ZSTD_getCParamsFromCDict() :
- *  as the name implies */
-ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
-
-/* ZSTD_compressBegin_advanced_internal() :
- * Private use only. To be called from zstdmt_compress.c. */
-size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
-                                    const void* dict, size_t dictSize,
-                                    ZSTD_dictContentType_e dictContentType,
-                                    ZSTD_dictTableLoadMethod_e dtlm,
-                                    const ZSTD_CDict* cdict,
-                                    const ZSTD_CCtx_params* params,
-                                    unsigned long long pledgedSrcSize);
-
-/* ZSTD_compress_advanced_internal() :
- * Private use only. To be called from zstdmt_compress.c. */
-size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
-                                       void* dst, size_t dstCapacity,
-                                 const void* src, size_t srcSize,
-                                 const void* dict,size_t dictSize,
-                                 const ZSTD_CCtx_params* params);
-
-
-/* ZSTD_writeLastEmptyBlock() :
- * output an empty Block with end-of-frame mark to complete a frame
- * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
- *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
- */
-size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
-
-
-/* ZSTD_referenceExternalSequences() :
- * Must be called before starting a compression operation.
- * seqs must parse a prefix of the source.
- * This cannot be used when long range matching is enabled.
- * Zstd will use these sequences, and pass the literals to a secondary block
- * compressor.
- * @return : An error code on failure.
- * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
- * access and data corruption.
- */
-size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
-
-/** ZSTD_cycleLog() :
- *  condition for correct operation : hashLog > 1 */
-U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
-
-/** ZSTD_CCtx_trace() :
- *  Trace the end of a compression call.
- */
-void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
-
-#endif /* ZSTD_COMPRESS_H */
diff --git a/dep/zstd/lib/compress/zstd_compress_literals.c b/dep/zstd/lib/compress/zstd_compress_literals.c
deleted file mode 100644
index 52b0a8059..000000000
--- a/dep/zstd/lib/compress/zstd_compress_literals.c
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
- /*-*************************************
- *  Dependencies
- ***************************************/
-#include "zstd_compress_literals.h"
-
-size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
-{
-    BYTE* const ostart = (BYTE*)dst;
-    U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
-
-    RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
-
-    switch(flSize)
-    {
-        case 1: /* 2 - 1 - 5 */
-            ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
-            break;
-        case 2: /* 2 - 2 - 12 */
-            MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
-            break;
-        case 3: /* 2 - 2 - 20 */
-            MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
-            break;
-        default:   /* not necessary : flSize is {1,2,3} */
-            assert(0);
-    }
-
-    ZSTD_memcpy(ostart + flSize, src, srcSize);
-    DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
-    return srcSize + flSize;
-}
-
-size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
-{
-    BYTE* const ostart = (BYTE*)dst;
-    U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
-
-    (void)dstCapacity;  /* dstCapacity already guaranteed to be >=4, hence large enough */
-
-    switch(flSize)
-    {
-        case 1: /* 2 - 1 - 5 */
-            ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
-            break;
-        case 2: /* 2 - 2 - 12 */
-            MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
-            break;
-        case 3: /* 2 - 2 - 20 */
-            MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
-            break;
-        default:   /* not necessary : flSize is {1,2,3} */
-            assert(0);
-    }
-
-    ostart[flSize] = *(const BYTE*)src;
-    DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1);
-    return flSize+1;
-}
-
-size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
-                              ZSTD_hufCTables_t* nextHuf,
-                              ZSTD_strategy strategy, int disableLiteralCompression,
-                              void* dst, size_t dstCapacity,
-                        const void* src, size_t srcSize,
-                              void* entropyWorkspace, size_t entropyWorkspaceSize,
-                        const int bmi2,
-                        unsigned suspectUncompressible)
-{
-    size_t const minGain = ZSTD_minGain(srcSize, strategy);
-    size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
-    BYTE*  const ostart = (BYTE*)dst;
-    U32 singleStream = srcSize < 256;
-    symbolEncodingType_e hType = set_compressed;
-    size_t cLitSize;
-
-    DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)",
-                disableLiteralCompression, (U32)srcSize);
-
-    /* Prepare nextEntropy assuming reusing the existing table */
-    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-
-    if (disableLiteralCompression)
-        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
-
-    /* small ? don't even attempt compression (speed opt) */
-#   define COMPRESS_LITERALS_SIZE_MIN 63
-    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
-        if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
-    }
-
-    RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
-    {   HUF_repeat repeat = prevHuf->repeatMode;
-        int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
-        if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
-        cLitSize = singleStream ?
-            HUF_compress1X_repeat(
-                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
-                HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
-                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible) :
-            HUF_compress4X_repeat(
-                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
-                HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
-                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible);
-        if (repeat != HUF_repeat_none) {
-            /* reused the existing table */
-            DEBUGLOG(5, "Reusing previous huffman table");
-            hType = set_repeat;
-        }
-    }
-
-    if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
-        ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
-    }
-    if (cLitSize==1) {
-        ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-        return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
-    }
-
-    if (hType == set_compressed) {
-        /* using a newly constructed table */
-        nextHuf->repeatMode = HUF_repeat_check;
-    }
-
-    /* Build header */
-    switch(lhSize)
-    {
-    case 3: /* 2 - 2 - 10 - 10 */
-        {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
-            MEM_writeLE24(ostart, lhc);
-            break;
-        }
-    case 4: /* 2 - 2 - 14 - 14 */
-        {   U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
-            MEM_writeLE32(ostart, lhc);
-            break;
-        }
-    case 5: /* 2 - 2 - 18 - 18 */
-        {   U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
-            MEM_writeLE32(ostart, lhc);
-            ostart[4] = (BYTE)(cLitSize >> 10);
-            break;
-        }
-    default:  /* not possible : lhSize is {3,4,5} */
-        assert(0);
-    }
-    DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize));
-    return lhSize+cLitSize;
-}
diff --git a/dep/zstd/lib/compress/zstd_compress_literals.h b/dep/zstd/lib/compress/zstd_compress_literals.h
deleted file mode 100644
index 9775fb97c..000000000
--- a/dep/zstd/lib/compress/zstd_compress_literals.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef ZSTD_COMPRESS_LITERALS_H
-#define ZSTD_COMPRESS_LITERALS_H
-
-#include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */
-
-
-size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-
-size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-
-/* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
-size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
-                              ZSTD_hufCTables_t* nextHuf,
-                              ZSTD_strategy strategy, int disableLiteralCompression,
-                              void* dst, size_t dstCapacity,
-                        const void* src, size_t srcSize,
-                              void* entropyWorkspace, size_t entropyWorkspaceSize,
-                        const int bmi2,
-                        unsigned suspectUncompressible);
-
-#endif /* ZSTD_COMPRESS_LITERALS_H */
diff --git a/dep/zstd/lib/compress/zstd_compress_sequences.c b/dep/zstd/lib/compress/zstd_compress_sequences.c
deleted file mode 100644
index f1e40af2e..000000000
--- a/dep/zstd/lib/compress/zstd_compress_sequences.c
+++ /dev/null
@@ -1,442 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
- /*-*************************************
- *  Dependencies
- ***************************************/
-#include "zstd_compress_sequences.h"
-
-/**
- * -log2(x / 256) lookup table for x in [0, 256).
- * If x == 0: Return 0
- * Else: Return floor(-log2(x / 256) * 256)
- */
-static unsigned const kInverseProbabilityLog256[256] = {
-    0,    2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
-    1130, 1100, 1073, 1047, 1024, 1001, 980,  960,  941,  923,  906,  889,
-    874,  859,  844,  830,  817,  804,  791,  779,  768,  756,  745,  734,
-    724,  714,  704,  694,  685,  676,  667,  658,  650,  642,  633,  626,
-    618,  610,  603,  595,  588,  581,  574,  567,  561,  554,  548,  542,
-    535,  529,  523,  517,  512,  506,  500,  495,  489,  484,  478,  473,
-    468,  463,  458,  453,  448,  443,  438,  434,  429,  424,  420,  415,
-    411,  407,  402,  398,  394,  390,  386,  382,  377,  373,  370,  366,
-    362,  358,  354,  350,  347,  343,  339,  336,  332,  329,  325,  322,
-    318,  315,  311,  308,  305,  302,  298,  295,  292,  289,  286,  282,
-    279,  276,  273,  270,  267,  264,  261,  258,  256,  253,  250,  247,
-    244,  241,  239,  236,  233,  230,  228,  225,  222,  220,  217,  215,
-    212,  209,  207,  204,  202,  199,  197,  194,  192,  190,  187,  185,
-    182,  180,  178,  175,  173,  171,  168,  166,  164,  162,  159,  157,
-    155,  153,  151,  149,  146,  144,  142,  140,  138,  136,  134,  132,
-    130,  128,  126,  123,  121,  119,  117,  115,  114,  112,  110,  108,
-    106,  104,  102,  100,  98,   96,   94,   93,   91,   89,   87,   85,
-    83,   82,   80,   78,   76,   74,   73,   71,   69,   67,   66,   64,
-    62,   61,   59,   57,   55,   54,   52,   50,   49,   47,   46,   44,
-    42,   41,   39,   37,   36,   34,   33,   31,   30,   28,   26,   25,
-    23,   22,   20,   19,   17,   16,   14,   13,   11,   10,   8,    7,
-    5,    4,    2,    1,
-};
-
-static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
-  void const* ptr = ctable;
-  U16 const* u16ptr = (U16 const*)ptr;
-  U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
-  return maxSymbolValue;
-}
-
-/**
- * Returns true if we should use ncount=-1 else we should
- * use ncount=1 for low probability symbols instead.
- */
-static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
-{
-    /* Heuristic: This should cover most blocks <= 16K and
-     * start to fade out after 16K to about 32K depending on
-     * comprssibility.
-     */
-    return nbSeq >= 2048;
-}
-
-/**
- * Returns the cost in bytes of encoding the normalized count header.
- * Returns an error if any of the helper functions return an error.
- */
-static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
-                              size_t const nbSeq, unsigned const FSELog)
-{
-    BYTE wksp[FSE_NCOUNTBOUND];
-    S16 norm[MaxSeq + 1];
-    const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
-    FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), "");
-    return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
-}
-
-/**
- * Returns the cost in bits of encoding the distribution described by count
- * using the entropy bound.
- */
-static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
-{
-    unsigned cost = 0;
-    unsigned s;
-
-    assert(total > 0);
-    for (s = 0; s <= max; ++s) {
-        unsigned norm = (unsigned)((256 * count[s]) / total);
-        if (count[s] != 0 && norm == 0)
-            norm = 1;
-        assert(count[s] < total);
-        cost += count[s] * kInverseProbabilityLog256[norm];
-    }
-    return cost >> 8;
-}
-
-/**
- * Returns the cost in bits of encoding the distribution in count using ctable.
- * Returns an error if ctable cannot represent all the symbols in count.
- */
-size_t ZSTD_fseBitCost(
-    FSE_CTable const* ctable,
-    unsigned const* count,
-    unsigned const max)
-{
-    unsigned const kAccuracyLog = 8;
-    size_t cost = 0;
-    unsigned s;
-    FSE_CState_t cstate;
-    FSE_initCState(&cstate, ctable);
-    if (ZSTD_getFSEMaxSymbolValue(ctable) < max) {
-        DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
-                    ZSTD_getFSEMaxSymbolValue(ctable), max);
-        return ERROR(GENERIC);
-    }
-    for (s = 0; s <= max; ++s) {
-        unsigned const tableLog = cstate.stateLog;
-        unsigned const badCost = (tableLog + 1) << kAccuracyLog;
-        unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
-        if (count[s] == 0)
-            continue;
-        if (bitCost >= badCost) {
-            DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s);
-            return ERROR(GENERIC);
-        }
-        cost += (size_t)count[s] * bitCost;
-    }
-    return cost >> kAccuracyLog;
-}
-
-/**
- * Returns the cost in bits of encoding the distribution in count using the
- * table described by norm. The max symbol support by norm is assumed >= max.
- * norm must be valid for every symbol with non-zero probability in count.
- */
-size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
-                             unsigned const* count, unsigned const max)
-{
-    unsigned const shift = 8 - accuracyLog;
-    size_t cost = 0;
-    unsigned s;
-    assert(accuracyLog <= 8);
-    for (s = 0; s <= max; ++s) {
-        unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1;
-        unsigned const norm256 = normAcc << shift;
-        assert(norm256 > 0);
-        assert(norm256 < 256);
-        cost += count[s] * kInverseProbabilityLog256[norm256];
-    }
-    return cost >> 8;
-}
-
-symbolEncodingType_e
-ZSTD_selectEncodingType(
-        FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
-        size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
-        FSE_CTable const* prevCTable,
-        short const* defaultNorm, U32 defaultNormLog,
-        ZSTD_defaultPolicy_e const isDefaultAllowed,
-        ZSTD_strategy const strategy)
-{
-    ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
-    if (mostFrequent == nbSeq) {
-        *repeatMode = FSE_repeat_none;
-        if (isDefaultAllowed && nbSeq <= 2) {
-            /* Prefer set_basic over set_rle when there are 2 or less symbols,
-             * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
-             * If basic encoding isn't possible, always choose RLE.
-             */
-            DEBUGLOG(5, "Selected set_basic");
-            return set_basic;
-        }
-        DEBUGLOG(5, "Selected set_rle");
-        return set_rle;
-    }
-    if (strategy < ZSTD_lazy) {
-        if (isDefaultAllowed) {
-            size_t const staticFse_nbSeq_max = 1000;
-            size_t const mult = 10 - strategy;
-            size_t const baseLog = 3;
-            size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog;  /* 28-36 for offset, 56-72 for lengths */
-            assert(defaultNormLog >= 5 && defaultNormLog <= 6);  /* xx_DEFAULTNORMLOG */
-            assert(mult <= 9 && mult >= 7);
-            if ( (*repeatMode == FSE_repeat_valid)
-              && (nbSeq < staticFse_nbSeq_max) ) {
-                DEBUGLOG(5, "Selected set_repeat");
-                return set_repeat;
-            }
-            if ( (nbSeq < dynamicFse_nbSeq_min)
-              || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
-                DEBUGLOG(5, "Selected set_basic");
-                /* The format allows default tables to be repeated, but it isn't useful.
-                 * When using simple heuristics to select encoding type, we don't want
-                 * to confuse these tables with dictionaries. When running more careful
-                 * analysis, we don't need to waste time checking both repeating tables
-                 * and default tables.
-                 */
-                *repeatMode = FSE_repeat_none;
-                return set_basic;
-            }
-        }
-    } else {
-        size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
-        size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
-        size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
-        size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
-
-        if (isDefaultAllowed) {
-            assert(!ZSTD_isError(basicCost));
-            assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
-        }
-        assert(!ZSTD_isError(NCountCost));
-        assert(compressedCost < ERROR(maxCode));
-        DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
-                    (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
-        if (basicCost <= repeatCost && basicCost <= compressedCost) {
-            DEBUGLOG(5, "Selected set_basic");
-            assert(isDefaultAllowed);
-            *repeatMode = FSE_repeat_none;
-            return set_basic;
-        }
-        if (repeatCost <= compressedCost) {
-            DEBUGLOG(5, "Selected set_repeat");
-            assert(!ZSTD_isError(repeatCost));
-            return set_repeat;
-        }
-        assert(compressedCost < basicCost && compressedCost < repeatCost);
-    }
-    DEBUGLOG(5, "Selected set_compressed");
-    *repeatMode = FSE_repeat_check;
-    return set_compressed;
-}
-
-typedef struct {
-    S16 norm[MaxSeq + 1];
-    U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)];
-} ZSTD_BuildCTableWksp;
-
-size_t
-ZSTD_buildCTable(void* dst, size_t dstCapacity,
-                FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
-                unsigned* count, U32 max,
-                const BYTE* codeTable, size_t nbSeq,
-                const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
-                const FSE_CTable* prevCTable, size_t prevCTableSize,
-                void* entropyWorkspace, size_t entropyWorkspaceSize)
-{
-    BYTE* op = (BYTE*)dst;
-    const BYTE* const oend = op + dstCapacity;
-    DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
-
-    switch (type) {
-    case set_rle:
-        FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), "");
-        RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space");
-        *op = codeTable[0];
-        return 1;
-    case set_repeat:
-        ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize);
-        return 0;
-    case set_basic:
-        FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), "");  /* note : could be pre-calculated */
-        return 0;
-    case set_compressed: {
-        ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace;
-        size_t nbSeq_1 = nbSeq;
-        const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
-        if (count[codeTable[nbSeq-1]] > 1) {
-            count[codeTable[nbSeq-1]]--;
-            nbSeq_1--;
-        }
-        assert(nbSeq_1 > 1);
-        assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
-        (void)entropyWorkspaceSize;
-        FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "FSE_normalizeCount failed");
-        assert(oend >= op);
-        {   size_t const NCountSize = FSE_writeNCount(op, (size_t)(oend - op), wksp->norm, max, tableLog);   /* overflow protected */
-            FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
-            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "FSE_buildCTable_wksp failed");
-            return NCountSize;
-        }
-    }
-    default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach");
-    }
-}
-
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_encodeSequences_body(
-            void* dst, size_t dstCapacity,
-            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
-            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
-            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
-            seqDef const* sequences, size_t nbSeq, int longOffsets)
-{
-    BIT_CStream_t blockStream;
-    FSE_CState_t  stateMatchLength;
-    FSE_CState_t  stateOffsetBits;
-    FSE_CState_t  stateLitLength;
-
-    RETURN_ERROR_IF(
-        ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
-        dstSize_tooSmall, "not enough space remaining");
-    DEBUGLOG(6, "available space for bitstream : %i  (dstCapacity=%u)",
-                (int)(blockStream.endPtr - blockStream.startPtr),
-                (unsigned)dstCapacity);
-
-    /* first symbols */
-    FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
-    FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]);
-    FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
-    BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
-    if (MEM_32bits()) BIT_flushBits(&blockStream);
-    BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]);
-    if (MEM_32bits()) BIT_flushBits(&blockStream);
-    if (longOffsets) {
-        U32 const ofBits = ofCodeTable[nbSeq-1];
-        unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
-        if (extraBits) {
-            BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, extraBits);
-            BIT_flushBits(&blockStream);
-        }
-        BIT_addBits(&blockStream, sequences[nbSeq-1].offBase >> extraBits,
-                    ofBits - extraBits);
-    } else {
-        BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]);
-    }
-    BIT_flushBits(&blockStream);
-
-    {   size_t n;
-        for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */
-            BYTE const llCode = llCodeTable[n];
-            BYTE const ofCode = ofCodeTable[n];
-            BYTE const mlCode = mlCodeTable[n];
-            U32  const llBits = LL_bits[llCode];
-            U32  const ofBits = ofCode;
-            U32  const mlBits = ML_bits[mlCode];
-            DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
-                        (unsigned)sequences[n].litLength,
-                        (unsigned)sequences[n].mlBase + MINMATCH,
-                        (unsigned)sequences[n].offBase);
-                                                                            /* 32b*/  /* 64b*/
-                                                                            /* (7)*/  /* (7)*/
-            FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
-            FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */
-            if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
-            FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */
-            if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
-                BIT_flushBits(&blockStream);                                /* (7)*/
-            BIT_addBits(&blockStream, sequences[n].litLength, llBits);
-            if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
-            BIT_addBits(&blockStream, sequences[n].mlBase, mlBits);
-            if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
-            if (longOffsets) {
-                unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
-                if (extraBits) {
-                    BIT_addBits(&blockStream, sequences[n].offBase, extraBits);
-                    BIT_flushBits(&blockStream);                            /* (7)*/
-                }
-                BIT_addBits(&blockStream, sequences[n].offBase >> extraBits,
-                            ofBits - extraBits);                            /* 31 */
-            } else {
-                BIT_addBits(&blockStream, sequences[n].offBase, ofBits);     /* 31 */
-            }
-            BIT_flushBits(&blockStream);                                    /* (7)*/
-            DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
-    }   }
-
-    DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
-    FSE_flushCState(&blockStream, &stateMatchLength);
-    DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
-    FSE_flushCState(&blockStream, &stateOffsetBits);
-    DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
-    FSE_flushCState(&blockStream, &stateLitLength);
-
-    {   size_t const streamSize = BIT_closeCStream(&blockStream);
-        RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
-        return streamSize;
-    }
-}
-
-static size_t
-ZSTD_encodeSequences_default(
-            void* dst, size_t dstCapacity,
-            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
-            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
-            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
-            seqDef const* sequences, size_t nbSeq, int longOffsets)
-{
-    return ZSTD_encodeSequences_body(dst, dstCapacity,
-                                    CTable_MatchLength, mlCodeTable,
-                                    CTable_OffsetBits, ofCodeTable,
-                                    CTable_LitLength, llCodeTable,
-                                    sequences, nbSeq, longOffsets);
-}
-
-
-#if DYNAMIC_BMI2
-
-static BMI2_TARGET_ATTRIBUTE size_t
-ZSTD_encodeSequences_bmi2(
-            void* dst, size_t dstCapacity,
-            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
-            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
-            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
-            seqDef const* sequences, size_t nbSeq, int longOffsets)
-{
-    return ZSTD_encodeSequences_body(dst, dstCapacity,
-                                    CTable_MatchLength, mlCodeTable,
-                                    CTable_OffsetBits, ofCodeTable,
-                                    CTable_LitLength, llCodeTable,
-                                    sequences, nbSeq, longOffsets);
-}
-
-#endif
-
-size_t ZSTD_encodeSequences(
-            void* dst, size_t dstCapacity,
-            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
-            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
-            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
-            seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
-{
-    DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
-#if DYNAMIC_BMI2
-    if (bmi2) {
-        return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
-                                         CTable_MatchLength, mlCodeTable,
-                                         CTable_OffsetBits, ofCodeTable,
-                                         CTable_LitLength, llCodeTable,
-                                         sequences, nbSeq, longOffsets);
-    }
-#endif
-    (void)bmi2;
-    return ZSTD_encodeSequences_default(dst, dstCapacity,
-                                        CTable_MatchLength, mlCodeTable,
-                                        CTable_OffsetBits, ofCodeTable,
-                                        CTable_LitLength, llCodeTable,
-                                        sequences, nbSeq, longOffsets);
-}
diff --git a/dep/zstd/lib/compress/zstd_compress_sequences.h b/dep/zstd/lib/compress/zstd_compress_sequences.h
deleted file mode 100644
index 7991364c2..000000000
--- a/dep/zstd/lib/compress/zstd_compress_sequences.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef ZSTD_COMPRESS_SEQUENCES_H
-#define ZSTD_COMPRESS_SEQUENCES_H
-
-#include "../common/fse.h" /* FSE_repeat, FSE_CTable */
-#include "../common/zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */
-
-typedef enum {
-    ZSTD_defaultDisallowed = 0,
-    ZSTD_defaultAllowed = 1
-} ZSTD_defaultPolicy_e;
-
-symbolEncodingType_e
-ZSTD_selectEncodingType(
-        FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
-        size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
-        FSE_CTable const* prevCTable,
-        short const* defaultNorm, U32 defaultNormLog,
-        ZSTD_defaultPolicy_e const isDefaultAllowed,
-        ZSTD_strategy const strategy);
-
-size_t
-ZSTD_buildCTable(void* dst, size_t dstCapacity,
-                FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
-                unsigned* count, U32 max,
-                const BYTE* codeTable, size_t nbSeq,
-                const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
-                const FSE_CTable* prevCTable, size_t prevCTableSize,
-                void* entropyWorkspace, size_t entropyWorkspaceSize);
-
-size_t ZSTD_encodeSequences(
-            void* dst, size_t dstCapacity,
-            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
-            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
-            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
-            seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
-
-size_t ZSTD_fseBitCost(
-    FSE_CTable const* ctable,
-    unsigned const* count,
-    unsigned const max);
-
-size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
-                             unsigned const* count, unsigned const max);
-#endif /* ZSTD_COMPRESS_SEQUENCES_H */
diff --git a/dep/zstd/lib/compress/zstd_compress_superblock.c b/dep/zstd/lib/compress/zstd_compress_superblock.c
deleted file mode 100644
index 10e337857..000000000
--- a/dep/zstd/lib/compress/zstd_compress_superblock.c
+++ /dev/null
@@ -1,573 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
- /*-*************************************
- *  Dependencies
- ***************************************/
-#include "zstd_compress_superblock.h"
-
-#include "../common/zstd_internal.h"  /* ZSTD_getSequenceLength */
-#include "hist.h"                     /* HIST_countFast_wksp */
-#include "zstd_compress_internal.h"   /* ZSTD_[huf|fse|entropy]CTablesMetadata_t */
-#include "zstd_compress_sequences.h"
-#include "zstd_compress_literals.h"
-
-/** ZSTD_compressSubBlock_literal() :
- *  Compresses literals section for a sub-block.
- *  When we have to write the Huffman table we will sometimes choose a header
- *  size larger than necessary. This is because we have to pick the header size
- *  before we know the table size + compressed size, so we have a bound on the
- *  table size. If we guessed incorrectly, we fall back to uncompressed literals.
- *
- *  We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded
- *  in writing the header, otherwise it is set to 0.
- *
- *  hufMetadata->hType has literals block type info.
- *      If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block.
- *      If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block.
- *      If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block
- *      If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
- *      and the following sub-blocks' literals sections will be Treeless_Literals_Block.
- *  @return : compressed size of literals section of a sub-block
- *            Or 0 if it unable to compress.
- *            Or error code */
-static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
-                                    const ZSTD_hufCTablesMetadata_t* hufMetadata,
-                                    const BYTE* literals, size_t litSize,
-                                    void* dst, size_t dstSize,
-                                    const int bmi2, int writeEntropy, int* entropyWritten)
-{
-    size_t const header = writeEntropy ? 200 : 0;
-    size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
-    BYTE* const ostart = (BYTE*)dst;
-    BYTE* const oend = ostart + dstSize;
-    BYTE* op = ostart + lhSize;
-    U32 const singleStream = lhSize == 3;
-    symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
-    size_t cLitSize = 0;
-
-    (void)bmi2; /* TODO bmi2... */
-
-    DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
-
-    *entropyWritten = 0;
-    if (litSize == 0 || hufMetadata->hType == set_basic) {
-      DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal");
-      return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
-    } else if (hufMetadata->hType == set_rle) {
-      DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal");
-      return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize);
-    }
-
-    assert(litSize > 0);
-    assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat);
-
-    if (writeEntropy && hufMetadata->hType == set_compressed) {
-        ZSTD_memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
-        op += hufMetadata->hufDesSize;
-        cLitSize += hufMetadata->hufDesSize;
-        DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
-    }
-
-    /* TODO bmi2 */
-    {   const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable)
-                                          : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable);
-        op += cSize;
-        cLitSize += cSize;
-        if (cSize == 0 || ERR_isError(cSize)) {
-            DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize));
-            return 0;
-        }
-        /* If we expand and we aren't writing a header then emit uncompressed */
-        if (!writeEntropy && cLitSize >= litSize) {
-            DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible");
-            return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
-        }
-        /* If we are writing headers then allow expansion that doesn't change our header size. */
-        if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) {
-            assert(cLitSize > litSize);
-            DEBUGLOG(5, "Literals expanded beyond allowed header size");
-            return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
-        }
-        DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize);
-    }
-
-    /* Build header */
-    switch(lhSize)
-    {
-    case 3: /* 2 - 2 - 10 - 10 */
-        {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
-            MEM_writeLE24(ostart, lhc);
-            break;
-        }
-    case 4: /* 2 - 2 - 14 - 14 */
-        {   U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18);
-            MEM_writeLE32(ostart, lhc);
-            break;
-        }
-    case 5: /* 2 - 2 - 18 - 18 */
-        {   U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22);
-            MEM_writeLE32(ostart, lhc);
-            ostart[4] = (BYTE)(cLitSize >> 10);
-            break;
-        }
-    default:  /* not possible : lhSize is {3,4,5} */
-        assert(0);
-    }
-    *entropyWritten = 1;
-    DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
-    return op-ostart;
-}
-
-static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) {
-    const seqDef* const sstart = sequences;
-    const seqDef* const send = sequences + nbSeq;
-    const seqDef* sp = sstart;
-    size_t matchLengthSum = 0;
-    size_t litLengthSum = 0;
-    (void)(litLengthSum); /* suppress unused variable warning on some environments */
-    while (send-sp > 0) {
-        ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
-        litLengthSum += seqLen.litLength;
-        matchLengthSum += seqLen.matchLength;
-        sp++;
-    }
-    assert(litLengthSum <= litSize);
-    if (!lastSequence) {
-        assert(litLengthSum == litSize);
-    }
-    return matchLengthSum + litSize;
-}
-
-/** ZSTD_compressSubBlock_sequences() :
- *  Compresses sequences section for a sub-block.
- *  fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have
- *  symbol compression modes for the super-block.
- *  The first successfully compressed block will have these in its header.
- *  We set entropyWritten=1 when we succeed in compressing the sequences.
- *  The following sub-blocks will always have repeat mode.
- *  @return : compressed size of sequences section of a sub-block
- *            Or 0 if it is unable to compress
- *            Or error code. */
-static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
-                                              const ZSTD_fseCTablesMetadata_t* fseMetadata,
-                                              const seqDef* sequences, size_t nbSeq,
-                                              const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
-                                              const ZSTD_CCtx_params* cctxParams,
-                                              void* dst, size_t dstCapacity,
-                                              const int bmi2, int writeEntropy, int* entropyWritten)
-{
-    const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
-    BYTE* const ostart = (BYTE*)dst;
-    BYTE* const oend = ostart + dstCapacity;
-    BYTE* op = ostart;
-    BYTE* seqHead;
-
-    DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets);
-
-    *entropyWritten = 0;
-    /* Sequences Header */
-    RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
-                    dstSize_tooSmall, "");
-    if (nbSeq < 0x7F)
-        *op++ = (BYTE)nbSeq;
-    else if (nbSeq < LONGNBSEQ)
-        op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
-    else
-        op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
-    if (nbSeq==0) {
-        return op - ostart;
-    }
-
-    /* seqHead : flags for FSE encoding type */
-    seqHead = op++;
-
-    DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart));
-
-    if (writeEntropy) {
-        const U32 LLtype = fseMetadata->llType;
-        const U32 Offtype = fseMetadata->ofType;
-        const U32 MLtype = fseMetadata->mlType;
-        DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize);
-        *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
-        ZSTD_memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
-        op += fseMetadata->fseTablesSize;
-    } else {
-        const U32 repeat = set_repeat;
-        *seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2));
-    }
-
-    {   size_t const bitstreamSize = ZSTD_encodeSequences(
-                                        op, oend - op,
-                                        fseTables->matchlengthCTable, mlCode,
-                                        fseTables->offcodeCTable, ofCode,
-                                        fseTables->litlengthCTable, llCode,
-                                        sequences, nbSeq,
-                                        longOffsets, bmi2);
-        FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");
-        op += bitstreamSize;
-        /* zstd versions <= 1.3.4 mistakenly report corruption when
-         * FSE_readNCount() receives a buffer < 4 bytes.
-         * Fixed by https://github.com/facebook/zstd/pull/1146.
-         * This can happen when the last set_compressed table present is 2
-         * bytes and the bitstream is only one byte.
-         * In this exceedingly rare case, we will simply emit an uncompressed
-         * block, since it isn't worth optimizing.
-         */
-#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
-        if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) {
-            /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
-            assert(fseMetadata->lastCountSize + bitstreamSize == 3);
-            DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
-                        "emitting an uncompressed block.");
-            return 0;
-        }
-#endif
-        DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize);
-    }
-
-    /* zstd versions <= 1.4.0 mistakenly report error when
-     * sequences section body size is less than 3 bytes.
-     * Fixed by https://github.com/facebook/zstd/pull/1664.
-     * This can happen when the previous sequences section block is compressed
-     * with rle mode and the current block's sequences section is compressed
-     * with repeat mode where sequences section body size can be 1 byte.
-     */
-#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
-    if (op-seqHead < 4) {
-        DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting "
-                    "an uncompressed block when sequences are < 4 bytes");
-        return 0;
-    }
-#endif
-
-    *entropyWritten = 1;
-    return op - ostart;
-}
-
-/** ZSTD_compressSubBlock() :
- *  Compresses a single sub-block.
- *  @return : compressed size of the sub-block
- *            Or 0 if it failed to compress. */
-static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
-                                    const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
-                                    const seqDef* sequences, size_t nbSeq,
-                                    const BYTE* literals, size_t litSize,
-                                    const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
-                                    const ZSTD_CCtx_params* cctxParams,
-                                    void* dst, size_t dstCapacity,
-                                    const int bmi2,
-                                    int writeLitEntropy, int writeSeqEntropy,
-                                    int* litEntropyWritten, int* seqEntropyWritten,
-                                    U32 lastBlock)
-{
-    BYTE* const ostart = (BYTE*)dst;
-    BYTE* const oend = ostart + dstCapacity;
-    BYTE* op = ostart + ZSTD_blockHeaderSize;
-    DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)",
-                litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
-    {   size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
-                                                        &entropyMetadata->hufMetadata, literals, litSize,
-                                                        op, oend-op, bmi2, writeLitEntropy, litEntropyWritten);
-        FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
-        if (cLitSize == 0) return 0;
-        op += cLitSize;
-    }
-    {   size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse,
-                                                  &entropyMetadata->fseMetadata,
-                                                  sequences, nbSeq,
-                                                  llCode, mlCode, ofCode,
-                                                  cctxParams,
-                                                  op, oend-op,
-                                                  bmi2, writeSeqEntropy, seqEntropyWritten);
-        FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
-        if (cSeqSize == 0) return 0;
-        op += cSeqSize;
-    }
-    /* Write block header */
-    {   size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
-        U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
-        MEM_writeLE24(ostart, cBlockHeader24);
-    }
-    return op-ostart;
-}
-
-static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
-                                                const ZSTD_hufCTables_t* huf,
-                                                const ZSTD_hufCTablesMetadata_t* hufMetadata,
-                                                void* workspace, size_t wkspSize,
-                                                int writeEntropy)
-{
-    unsigned* const countWksp = (unsigned*)workspace;
-    unsigned maxSymbolValue = 255;
-    size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
-
-    if (hufMetadata->hType == set_basic) return litSize;
-    else if (hufMetadata->hType == set_rle) return 1;
-    else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) {
-        size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize);
-        if (ZSTD_isError(largest)) return litSize;
-        {   size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue);
-            if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize;
-            return cLitSizeEstimate + literalSectionHeaderSize;
-    }   }
-    assert(0); /* impossible */
-    return 0;
-}
-
-static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
-                        const BYTE* codeTable, unsigned maxCode,
-                        size_t nbSeq, const FSE_CTable* fseCTable,
-                        const U8* additionalBits,
-                        short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
-                        void* workspace, size_t wkspSize)
-{
-    unsigned* const countWksp = (unsigned*)workspace;
-    const BYTE* ctp = codeTable;
-    const BYTE* const ctStart = ctp;
-    const BYTE* const ctEnd = ctStart + nbSeq;
-    size_t cSymbolTypeSizeEstimateInBits = 0;
-    unsigned max = maxCode;
-
-    HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize);  /* can't fail */
-    if (type == set_basic) {
-        /* We selected this encoding type, so it must be valid. */
-        assert(max <= defaultMax);
-        cSymbolTypeSizeEstimateInBits = max <= defaultMax
-                ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max)
-                : ERROR(GENERIC);
-    } else if (type == set_rle) {
-        cSymbolTypeSizeEstimateInBits = 0;
-    } else if (type == set_compressed || type == set_repeat) {
-        cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);
-    }
-    if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10;
-    while (ctp < ctEnd) {
-        if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];
-        else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */
-        ctp++;
-    }
-    return cSymbolTypeSizeEstimateInBits / 8;
-}
-
-static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
-                                                  const BYTE* llCodeTable,
-                                                  const BYTE* mlCodeTable,
-                                                  size_t nbSeq,
-                                                  const ZSTD_fseCTables_t* fseTables,
-                                                  const ZSTD_fseCTablesMetadata_t* fseMetadata,
-                                                  void* workspace, size_t wkspSize,
-                                                  int writeEntropy)
-{
-    size_t const sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
-    size_t cSeqSizeEstimate = 0;
-    if (nbSeq == 0) return sequencesSectionHeaderSize;
-    cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
-                                         nbSeq, fseTables->offcodeCTable, NULL,
-                                         OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
-                                         workspace, wkspSize);
-    cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL,
-                                         nbSeq, fseTables->litlengthCTable, LL_bits,
-                                         LL_defaultNorm, LL_defaultNormLog, MaxLL,
-                                         workspace, wkspSize);
-    cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML,
-                                         nbSeq, fseTables->matchlengthCTable, ML_bits,
-                                         ML_defaultNorm, ML_defaultNormLog, MaxML,
-                                         workspace, wkspSize);
-    if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
-    return cSeqSizeEstimate + sequencesSectionHeaderSize;
-}
-
-static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
-                                        const BYTE* ofCodeTable,
-                                        const BYTE* llCodeTable,
-                                        const BYTE* mlCodeTable,
-                                        size_t nbSeq,
-                                        const ZSTD_entropyCTables_t* entropy,
-                                        const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
-                                        void* workspace, size_t wkspSize,
-                                        int writeLitEntropy, int writeSeqEntropy) {
-    size_t cSizeEstimate = 0;
-    cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
-                                                         &entropy->huf, &entropyMetadata->hufMetadata,
-                                                         workspace, wkspSize, writeLitEntropy);
-    cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
-                                                         nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
-                                                         workspace, wkspSize, writeSeqEntropy);
-    return cSizeEstimate + ZSTD_blockHeaderSize;
-}
-
-static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
-{
-    if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle)
-        return 1;
-    if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle)
-        return 1;
-    if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle)
-        return 1;
-    return 0;
-}
-
-/** ZSTD_compressSubBlock_multi() :
- *  Breaks super-block into multiple sub-blocks and compresses them.
- *  Entropy will be written to the first block.
- *  The following blocks will use repeat mode to compress.
- *  All sub-blocks are compressed blocks (no raw or rle blocks).
- *  @return : compressed size of the super block (which is multiple ZSTD blocks)
- *            Or 0 if it failed to compress. */
-static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
-                            const ZSTD_compressedBlockState_t* prevCBlock,
-                            ZSTD_compressedBlockState_t* nextCBlock,
-                            const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
-                            const ZSTD_CCtx_params* cctxParams,
-                                  void* dst, size_t dstCapacity,
-                            const void* src, size_t srcSize,
-                            const int bmi2, U32 lastBlock,
-                            void* workspace, size_t wkspSize)
-{
-    const seqDef* const sstart = seqStorePtr->sequencesStart;
-    const seqDef* const send = seqStorePtr->sequences;
-    const seqDef* sp = sstart;
-    const BYTE* const lstart = seqStorePtr->litStart;
-    const BYTE* const lend = seqStorePtr->lit;
-    const BYTE* lp = lstart;
-    BYTE const* ip = (BYTE const*)src;
-    BYTE const* const iend = ip + srcSize;
-    BYTE* const ostart = (BYTE*)dst;
-    BYTE* const oend = ostart + dstCapacity;
-    BYTE* op = ostart;
-    const BYTE* llCodePtr = seqStorePtr->llCode;
-    const BYTE* mlCodePtr = seqStorePtr->mlCode;
-    const BYTE* ofCodePtr = seqStorePtr->ofCode;
-    size_t targetCBlockSize = cctxParams->targetCBlockSize;
-    size_t litSize, seqCount;
-    int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
-    int writeSeqEntropy = 1;
-    int lastSequence = 0;
-
-    DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
-                (unsigned)(lend-lp), (unsigned)(send-sstart));
-
-    litSize = 0;
-    seqCount = 0;
-    do {
-        size_t cBlockSizeEstimate = 0;
-        if (sstart == send) {
-            lastSequence = 1;
-        } else {
-            const seqDef* const sequence = sp + seqCount;
-            lastSequence = sequence == send - 1;
-            litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength;
-            seqCount++;
-        }
-        if (lastSequence) {
-            assert(lp <= lend);
-            assert(litSize <= (size_t)(lend - lp));
-            litSize = (size_t)(lend - lp);
-        }
-        /* I think there is an optimization opportunity here.
-         * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
-         * since it recalculates estimate from scratch.
-         * For example, it would recount literal distribution and symbol codes every time.
-         */
-        cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
-                                                       &nextCBlock->entropy, entropyMetadata,
-                                                       workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
-        if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
-            int litEntropyWritten = 0;
-            int seqEntropyWritten = 0;
-            const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence);
-            const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
-                                                       sp, seqCount,
-                                                       lp, litSize,
-                                                       llCodePtr, mlCodePtr, ofCodePtr,
-                                                       cctxParams,
-                                                       op, oend-op,
-                                                       bmi2, writeLitEntropy, writeSeqEntropy,
-                                                       &litEntropyWritten, &seqEntropyWritten,
-                                                       lastBlock && lastSequence);
-            FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
-            if (cSize > 0 && cSize < decompressedSize) {
-                DEBUGLOG(5, "Committed the sub-block");
-                assert(ip + decompressedSize <= iend);
-                ip += decompressedSize;
-                sp += seqCount;
-                lp += litSize;
-                op += cSize;
-                llCodePtr += seqCount;
-                mlCodePtr += seqCount;
-                ofCodePtr += seqCount;
-                litSize = 0;
-                seqCount = 0;
-                /* Entropy only needs to be written once */
-                if (litEntropyWritten) {
-                    writeLitEntropy = 0;
-                }
-                if (seqEntropyWritten) {
-                    writeSeqEntropy = 0;
-                }
-            }
-        }
-    } while (!lastSequence);
-    if (writeLitEntropy) {
-        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
-        ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
-    }
-    if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
-        /* If we haven't written our entropy tables, then we've violated our contract and
-         * must emit an uncompressed block.
-         */
-        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
-        return 0;
-    }
-    if (ip < iend) {
-        size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock);
-        DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip));
-        FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
-        assert(cSize != 0);
-        op += cSize;
-        /* We have to regenerate the repcodes because we've skipped some sequences */
-        if (sp < send) {
-            seqDef const* seq;
-            repcodes_t rep;
-            ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
-            for (seq = sstart; seq < sp; ++seq) {
-                ZSTD_updateRep(rep.rep, seq->offBase - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
-            }
-            ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
-        }
-    }
-    DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
-    return op-ostart;
-}
-
-size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
-                               void* dst, size_t dstCapacity,
-                               void const* src, size_t srcSize,
-                               unsigned lastBlock) {
-    ZSTD_entropyCTablesMetadata_t entropyMetadata;
-
-    FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
-          &zc->blockState.prevCBlock->entropy,
-          &zc->blockState.nextCBlock->entropy,
-          &zc->appliedParams,
-          &entropyMetadata,
-          zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
-
-    return ZSTD_compressSubBlock_multi(&zc->seqStore,
-            zc->blockState.prevCBlock,
-            zc->blockState.nextCBlock,
-            &entropyMetadata,
-            &zc->appliedParams,
-            dst, dstCapacity,
-            src, srcSize,
-            zc->bmi2, lastBlock,
-            zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
-}
diff --git a/dep/zstd/lib/compress/zstd_compress_superblock.h b/dep/zstd/lib/compress/zstd_compress_superblock.h
deleted file mode 100644
index 176f9b106..000000000
--- a/dep/zstd/lib/compress/zstd_compress_superblock.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef ZSTD_COMPRESS_ADVANCED_H
-#define ZSTD_COMPRESS_ADVANCED_H
-
-/*-*************************************
-*  Dependencies
-***************************************/
-
-#include "../zstd.h" /* ZSTD_CCtx */
-
-/*-*************************************
-*  Target Compressed Block Size
-***************************************/
-
-/* ZSTD_compressSuperBlock() :
- * Used to compress a super block when targetCBlockSize is being used.
- * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */
-size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
-                               void* dst, size_t dstCapacity,
-                               void const* src, size_t srcSize,
-                               unsigned lastBlock);
-
-#endif /* ZSTD_COMPRESS_ADVANCED_H */
diff --git a/dep/zstd/lib/compress/zstd_cwksp.h b/dep/zstd/lib/compress/zstd_cwksp.h
deleted file mode 100644
index dc3f40c80..000000000
--- a/dep/zstd/lib/compress/zstd_cwksp.h
+++ /dev/null
@@ -1,676 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef ZSTD_CWKSP_H
-#define ZSTD_CWKSP_H
-
-/*-*************************************
-*  Dependencies
-***************************************/
-#include "../common/zstd_internal.h"
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-/*-*************************************
-*  Constants
-***************************************/
-
-/* Since the workspace is effectively its own little malloc implementation /
- * arena, when we run under ASAN, we should similarly insert redzones between
- * each internal element of the workspace, so ASAN will catch overruns that
- * reach outside an object but that stay inside the workspace.
- *
- * This defines the size of that redzone.
- */
-#ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE
-#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
-#endif
-
-
-/* Set our tables and aligneds to align by 64 bytes */
-#define ZSTD_CWKSP_ALIGNMENT_BYTES 64
-
-/*-*************************************
-*  Structures
-***************************************/
-typedef enum {
-    ZSTD_cwksp_alloc_objects,
-    ZSTD_cwksp_alloc_buffers,
-    ZSTD_cwksp_alloc_aligned
-} ZSTD_cwksp_alloc_phase_e;
-
-/**
- * Used to describe whether the workspace is statically allocated (and will not
- * necessarily ever be freed), or if it's dynamically allocated and we can
- * expect a well-formed caller to free this.
- */
-typedef enum {
-    ZSTD_cwksp_dynamic_alloc,
-    ZSTD_cwksp_static_alloc
-} ZSTD_cwksp_static_alloc_e;
-
-/**
- * Zstd fits all its internal datastructures into a single continuous buffer,
- * so that it only needs to perform a single OS allocation (or so that a buffer
- * can be provided to it and it can perform no allocations at all). This buffer
- * is called the workspace.
- *
- * Several optimizations complicate that process of allocating memory ranges
- * from this workspace for each internal datastructure:
- *
- * - These different internal datastructures have different setup requirements:
- *
- *   - The static objects need to be cleared once and can then be trivially
- *     reused for each compression.
- *
- *   - Various buffers don't need to be initialized at all--they are always
- *     written into before they're read.
- *
- *   - The matchstate tables have a unique requirement that they don't need
- *     their memory to be totally cleared, but they do need the memory to have
- *     some bound, i.e., a guarantee that all values in the memory they've been
- *     allocated is less than some maximum value (which is the starting value
- *     for the indices that they will then use for compression). When this
- *     guarantee is provided to them, they can use the memory without any setup
- *     work. When it can't, they have to clear the area.
- *
- * - These buffers also have different alignment requirements.
- *
- * - We would like to reuse the objects in the workspace for multiple
- *   compressions without having to perform any expensive reallocation or
- *   reinitialization work.
- *
- * - We would like to be able to efficiently reuse the workspace across
- *   multiple compressions **even when the compression parameters change** and
- *   we need to resize some of the objects (where possible).
- *
- * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp
- * abstraction was created. It works as follows:
- *
- * Workspace Layout:
- *
- * [                        ... workspace ...                         ]
- * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
- *
- * The various objects that live in the workspace are divided into the
- * following categories, and are allocated separately:
- *
- * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict,
- *   so that literally everything fits in a single buffer. Note: if present,
- *   this must be the first object in the workspace, since ZSTD_customFree{CCtx,
- *   CDict}() rely on a pointer comparison to see whether one or two frees are
- *   required.
- *
- * - Fixed size objects: these are fixed-size, fixed-count objects that are
- *   nonetheless "dynamically" allocated in the workspace so that we can
- *   control how they're initialized separately from the broader ZSTD_CCtx.
- *   Examples:
- *   - Entropy Workspace
- *   - 2 x ZSTD_compressedBlockState_t
- *   - CDict dictionary contents
- *
- * - Tables: these are any of several different datastructures (hash tables,
- *   chain tables, binary trees) that all respect a common format: they are
- *   uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
- *   Their sizes depend on the cparams. These tables are 64-byte aligned.
- *
- * - Aligned: these buffers are used for various purposes that require 4 byte
- *   alignment, but don't require any initialization before they're used. These
- *   buffers are each aligned to 64 bytes.
- *
- * - Buffers: these buffers are used for various purposes that don't require
- *   any alignment or initialization before they're used. This means they can
- *   be moved around at no cost for a new compression.
- *
- * Allocating Memory:
- *
- * The various types of objects must be allocated in order, so they can be
- * correctly packed into the workspace buffer. That order is:
- *
- * 1. Objects
- * 2. Buffers
- * 3. Aligned/Tables
- *
- * Attempts to reserve objects of different types out of order will fail.
- */
-typedef struct {
-    void* workspace;
-    void* workspaceEnd;
-
-    void* objectEnd;
-    void* tableEnd;
-    void* tableValidEnd;
-    void* allocStart;
-
-    BYTE allocFailed;
-    int workspaceOversizedDuration;
-    ZSTD_cwksp_alloc_phase_e phase;
-    ZSTD_cwksp_static_alloc_e isStatic;
-} ZSTD_cwksp;
-
-/*-*************************************
-*  Functions
-***************************************/
-
-MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
-
-MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
-    (void)ws;
-    assert(ws->workspace <= ws->objectEnd);
-    assert(ws->objectEnd <= ws->tableEnd);
-    assert(ws->objectEnd <= ws->tableValidEnd);
-    assert(ws->tableEnd <= ws->allocStart);
-    assert(ws->tableValidEnd <= ws->allocStart);
-    assert(ws->allocStart <= ws->workspaceEnd);
-}
-
-/**
- * Align must be a power of 2.
- */
-MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
-    size_t const mask = align - 1;
-    assert((align & mask) == 0);
-    return (size + mask) & ~mask;
-}
-
-/**
- * Use this to determine how much space in the workspace we will consume to
- * allocate this object. (Normally it should be exactly the size of the object,
- * but under special conditions, like ASAN, where we pad each object, it might
- * be larger.)
- *
- * Since tables aren't currently redzoned, you don't need to call through this
- * to figure out how much space you need for the matchState tables. Everything
- * else is though.
- *
- * Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned_alloc_size().
- */
-MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
-    if (size == 0)
-        return 0;
-#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
-    return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
-#else
-    return size;
-#endif
-}
-
-/**
- * Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes.
- * Used to determine the number of bytes required for a given "aligned".
- */
-MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
-    return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, ZSTD_CWKSP_ALIGNMENT_BYTES));
-}
-
-/**
- * Returns the amount of additional space the cwksp must allocate
- * for internal purposes (currently only alignment).
- */
-MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
-    /* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes
-     * to align the beginning of tables section, as well as another n_2=[0, 63] bytes
-     * to align the beginning of the aligned section.
-     *
-     * n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and
-     * aligneds being sized in multiples of 64 bytes.
-     */
-    size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES;
-    return slackSpace;
-}
-
-
-/**
- * Return the number of additional bytes required to align a pointer to the given number of bytes.
- * alignBytes must be a power of two.
- */
-MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignBytes) {
-    size_t const alignBytesMask = alignBytes - 1;
-    size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
-    assert((alignBytes & alignBytesMask) == 0);
-    assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES);
-    return bytes;
-}
-
-/**
- * Internal function. Do not use directly.
- * Reserves the given number of bytes within the aligned/buffer segment of the wksp,
- * which counts from the end of the wksp (as opposed to the object/table segment).
- *
- * Returns a pointer to the beginning of that space.
- */
-MEM_STATIC void*
-ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes)
-{
-    void* const alloc = (BYTE*)ws->allocStart - bytes;
-    void* const bottom = ws->tableEnd;
-    DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
-        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
-    ZSTD_cwksp_assert_internal_consistency(ws);
-    assert(alloc >= bottom);
-    if (alloc < bottom) {
-        DEBUGLOG(4, "cwksp: alloc failed!");
-        ws->allocFailed = 1;
-        return NULL;
-    }
-    /* the area is reserved from the end of wksp.
-     * If it overlaps with tableValidEnd, it voids guarantees on values' range */
-    if (alloc < ws->tableValidEnd) {
-        ws->tableValidEnd = alloc;
-    }
-    ws->allocStart = alloc;
-    return alloc;
-}
-
-/**
- * Moves the cwksp to the next phase, and does any necessary allocations.
- * cwksp initialization must necessarily go through each phase in order.
- * Returns a 0 on success, or zstd error
- */
-MEM_STATIC size_t
-ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase)
-{
-    assert(phase >= ws->phase);
-    if (phase > ws->phase) {
-        /* Going from allocating objects to allocating buffers */
-        if (ws->phase < ZSTD_cwksp_alloc_buffers &&
-                phase >= ZSTD_cwksp_alloc_buffers) {
-            ws->tableValidEnd = ws->objectEnd;
-        }
-
-        /* Going from allocating buffers to allocating aligneds/tables */
-        if (ws->phase < ZSTD_cwksp_alloc_aligned &&
-                phase >= ZSTD_cwksp_alloc_aligned) {
-            {   /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */
-                size_t const bytesToAlign =
-                    ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES);
-                DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign);
-                ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */
-                RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign),
-                                memory_allocation, "aligned phase - alignment initial allocation failed!");
-            }
-            {   /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
-                void* const alloc = ws->objectEnd;
-                size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
-                void* const objectEnd = (BYTE*)alloc + bytesToAlign;
-                DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
-                RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation,
-                                "table phase - alignment initial allocation failed!");
-                ws->objectEnd = objectEnd;
-                ws->tableEnd = objectEnd;  /* table area starts being empty */
-                if (ws->tableValidEnd < ws->tableEnd) {
-                    ws->tableValidEnd = ws->tableEnd;
-        }   }   }
-        ws->phase = phase;
-        ZSTD_cwksp_assert_internal_consistency(ws);
-    }
-    return 0;
-}
-
-/**
- * Returns whether this object/buffer/etc was allocated in this workspace.
- */
-MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr)
-{
-    return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
-}
-
-/**
- * Internal function. Do not use directly.
- */
-MEM_STATIC void*
-ZSTD_cwksp_reserve_internal(ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase)
-{
-    void* alloc;
-    if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase)) || bytes == 0) {
-        return NULL;
-    }
-
-#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
-    /* over-reserve space */
-    bytes += 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
-#endif
-
-    alloc = ZSTD_cwksp_reserve_internal_buffer_space(ws, bytes);
-
-#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
-    /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
-     * either size. */
-    if (alloc) {
-        alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
-        if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
-            __asan_unpoison_memory_region(alloc, bytes);
-        }
-    }
-#endif
-
-    return alloc;
-}
-
-/**
- * Reserves and returns unaligned memory.
- */
-MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes)
-{
-    return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
-}
-
-/**
- * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
- */
-MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
-{
-    void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES),
-                                            ZSTD_cwksp_alloc_aligned);
-    assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
-    return ptr;
-}
-
-/**
- * Aligned on 64 bytes. These buffers have the special property that
- * their values remain constrained, allowing us to re-use them without
- * memset()-ing them.
- */
-MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
-{
-    const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
-    void* alloc;
-    void* end;
-    void* top;
-
-    if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
-        return NULL;
-    }
-    alloc = ws->tableEnd;
-    end = (BYTE *)alloc + bytes;
-    top = ws->allocStart;
-
-    DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
-        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
-    assert((bytes & (sizeof(U32)-1)) == 0);
-    ZSTD_cwksp_assert_internal_consistency(ws);
-    assert(end <= top);
-    if (end > top) {
-        DEBUGLOG(4, "cwksp: table alloc failed!");
-        ws->allocFailed = 1;
-        return NULL;
-    }
-    ws->tableEnd = end;
-
-#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
-    if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
-        __asan_unpoison_memory_region(alloc, bytes);
-    }
-#endif
-
-    assert((bytes & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0);
-    assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
-    return alloc;
-}
-
-/**
- * Aligned on sizeof(void*).
- * Note : should happen only once, at workspace first initialization
- */
-MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes)
-{
-    size_t const roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
-    void* alloc = ws->objectEnd;
-    void* end = (BYTE*)alloc + roundedBytes;
-
-#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
-    /* over-reserve space */
-    end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
-#endif
-
-    DEBUGLOG(4,
-        "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
-        alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
-    assert((size_t)alloc % ZSTD_ALIGNOF(void*) == 0);
-    assert(bytes % ZSTD_ALIGNOF(void*) == 0);
-    ZSTD_cwksp_assert_internal_consistency(ws);
-    /* we must be in the first phase, no advance is possible */
-    if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
-        DEBUGLOG(3, "cwksp: object alloc failed!");
-        ws->allocFailed = 1;
-        return NULL;
-    }
-    ws->objectEnd = end;
-    ws->tableEnd = end;
-    ws->tableValidEnd = end;
-
-#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
-    /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
-     * either size. */
-    alloc = (BYTE*)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
-    if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
-        __asan_unpoison_memory_region(alloc, bytes);
-    }
-#endif
-
-    return alloc;
-}
-
-MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws)
-{
-    DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
-
-#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
-    /* To validate that the table re-use logic is sound, and that we don't
-     * access table space that we haven't cleaned, we re-"poison" the table
-     * space every time we mark it dirty. */
-    {
-        size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
-        assert(__msan_test_shadow(ws->objectEnd, size) == -1);
-        __msan_poison(ws->objectEnd, size);
-    }
-#endif
-
-    assert(ws->tableValidEnd >= ws->objectEnd);
-    assert(ws->tableValidEnd <= ws->allocStart);
-    ws->tableValidEnd = ws->objectEnd;
-    ZSTD_cwksp_assert_internal_consistency(ws);
-}
-
-MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) {
-    DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean");
-    assert(ws->tableValidEnd >= ws->objectEnd);
-    assert(ws->tableValidEnd <= ws->allocStart);
-    if (ws->tableValidEnd < ws->tableEnd) {
-        ws->tableValidEnd = ws->tableEnd;
-    }
-    ZSTD_cwksp_assert_internal_consistency(ws);
-}
-
-/**
- * Zero the part of the allocated tables not already marked clean.
- */
-MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
-    DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables");
-    assert(ws->tableValidEnd >= ws->objectEnd);
-    assert(ws->tableValidEnd <= ws->allocStart);
-    if (ws->tableValidEnd < ws->tableEnd) {
-        ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
-    }
-    ZSTD_cwksp_mark_tables_clean(ws);
-}
-
-/**
- * Invalidates table allocations.
- * All other allocations remain valid.
- */
-MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
-    DEBUGLOG(4, "cwksp: clearing tables!");
-
-#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
-    /* We don't do this when the workspace is statically allocated, because
-     * when that is the case, we have no capability to hook into the end of the
-     * workspace's lifecycle to unpoison the memory.
-     */
-    if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
-        size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
-        __asan_poison_memory_region(ws->objectEnd, size);
-    }
-#endif
-
-    ws->tableEnd = ws->objectEnd;
-    ZSTD_cwksp_assert_internal_consistency(ws);
-}
-
-/**
- * Invalidates all buffer, aligned, and table allocations.
- * Object allocations remain valid.
- */
-MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
-    DEBUGLOG(4, "cwksp: clearing!");
-
-#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
-    /* To validate that the context re-use logic is sound, and that we don't
-     * access stuff that this compression hasn't initialized, we re-"poison"
-     * the workspace (or at least the non-static, non-table parts of it)
-     * every time we start a new compression. */
-    {
-        size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd;
-        __msan_poison(ws->tableValidEnd, size);
-    }
-#endif
-
-#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
-    /* We don't do this when the workspace is statically allocated, because
-     * when that is the case, we have no capability to hook into the end of the
-     * workspace's lifecycle to unpoison the memory.
-     */
-    if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
-        size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd;
-        __asan_poison_memory_region(ws->objectEnd, size);
-    }
-#endif
-
-    ws->tableEnd = ws->objectEnd;
-    ws->allocStart = ws->workspaceEnd;
-    ws->allocFailed = 0;
-    if (ws->phase > ZSTD_cwksp_alloc_buffers) {
-        ws->phase = ZSTD_cwksp_alloc_buffers;
-    }
-    ZSTD_cwksp_assert_internal_consistency(ws);
-}
-
-/**
- * The provided workspace takes ownership of the buffer [start, start+size).
- * Any existing values in the workspace are ignored (the previously managed
- * buffer, if present, must be separately freed).
- */
-MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_cwksp_static_alloc_e isStatic) {
-    DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size);
-    assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
-    ws->workspace = start;
-    ws->workspaceEnd = (BYTE*)start + size;
-    ws->objectEnd = ws->workspace;
-    ws->tableValidEnd = ws->objectEnd;
-    ws->phase = ZSTD_cwksp_alloc_objects;
-    ws->isStatic = isStatic;
-    ZSTD_cwksp_clear(ws);
-    ws->workspaceOversizedDuration = 0;
-    ZSTD_cwksp_assert_internal_consistency(ws);
-}
-
-MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
-    void* workspace = ZSTD_customMalloc(size, customMem);
-    DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
-    RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!");
-    ZSTD_cwksp_init(ws, workspace, size, ZSTD_cwksp_dynamic_alloc);
-    return 0;
-}
-
-MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
-    void *ptr = ws->workspace;
-    DEBUGLOG(4, "cwksp: freeing workspace");
-    ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp));
-    ZSTD_customFree(ptr, customMem);
-}
-
-/**
- * Moves the management of a workspace from one cwksp to another. The src cwksp
- * is left in an invalid state (src must be re-init()'ed before it's used again).
- */
-MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
-    *dst = *src;
-    ZSTD_memset(src, 0, sizeof(ZSTD_cwksp));
-}
-
-MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
-    return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
-}
-
-MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
-    return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
-         + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
-}
-
-MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
-    return ws->allocFailed;
-}
-
-/*-*************************************
-*  Functions Checking Free Space
-***************************************/
-
-/* ZSTD_alignmentSpaceWithinBounds() :
- * Returns if the estimated space needed for a wksp is within an acceptable limit of the
- * actual amount of space used.
- */
-MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws,
-                                                        size_t const estimatedSpace, int resizedWorkspace) {
-    if (resizedWorkspace) {
-        /* Resized/newly allocated wksp should have exact bounds */
-        return ZSTD_cwksp_used(ws) == estimatedSpace;
-    } else {
-        /* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes
-         * than estimatedSpace. See the comments in zstd_cwksp.h for details.
-         */
-        return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63);
-    }
-}
-
-
-MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
-    return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
-}
-
-MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
-    return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace;
-}
-
-MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
-    return ZSTD_cwksp_check_available(
-        ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR);
-}
-
-MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
-    return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)
-        && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION;
-}
-
-MEM_STATIC void ZSTD_cwksp_bump_oversized_duration(
-        ZSTD_cwksp* ws, size_t additionalNeededSpace) {
-    if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) {
-        ws->workspaceOversizedDuration++;
-    } else {
-        ws->workspaceOversizedDuration = 0;
-    }
-}
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif /* ZSTD_CWKSP_H */
diff --git a/dep/zstd/lib/compress/zstd_double_fast.c b/dep/zstd/lib/compress/zstd_double_fast.c
deleted file mode 100644
index 76933dea2..000000000
--- a/dep/zstd/lib/compress/zstd_double_fast.c
+++ /dev/null
@@ -1,696 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#include "zstd_compress_internal.h"
-#include "zstd_double_fast.h"
-
-
-void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
-                              void const* end, ZSTD_dictTableLoadMethod_e dtlm)
-{
-    const ZSTD_compressionParameters* const cParams = &ms->cParams;
-    U32* const hashLarge = ms->hashTable;
-    U32  const hBitsL = cParams->hashLog;
-    U32  const mls = cParams->minMatch;
-    U32* const hashSmall = ms->chainTable;
-    U32  const hBitsS = cParams->chainLog;
-    const BYTE* const base = ms->window.base;
-    const BYTE* ip = base + ms->nextToUpdate;
-    const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
-    const U32 fastHashFillStep = 3;
-
-    /* Always insert every fastHashFillStep position into the hash tables.
-     * Insert the other positions into the large hash table if their entry
-     * is empty.
-     */
-    for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
-        U32 const curr = (U32)(ip - base);
-        U32 i;
-        for (i = 0; i < fastHashFillStep; ++i) {
-            size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
-            size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
-            if (i == 0)
-                hashSmall[smHash] = curr + i;
-            if (i == 0 || hashLarge[lgHash] == 0)
-                hashLarge[lgHash] = curr + i;
-            /* Only load extra positions for ZSTD_dtlm_full */
-            if (dtlm == ZSTD_dtlm_fast)
-                break;
-    }   }
-}
-
-
-FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_doubleFast_noDict_generic(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize, U32 const mls /* template */)
-{
-    ZSTD_compressionParameters const* cParams = &ms->cParams;
-    U32* const hashLong = ms->hashTable;
-    const U32 hBitsL = cParams->hashLog;
-    U32* const hashSmall = ms->chainTable;
-    const U32 hBitsS = cParams->chainLog;
-    const BYTE* const base = ms->window.base;
-    const BYTE* const istart = (const BYTE*)src;
-    const BYTE* anchor = istart;
-    const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
-    /* presumes that, if there is a dictionary, it must be using Attach mode */
-    const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
-    const BYTE* const prefixLowest = base + prefixLowestIndex;
-    const BYTE* const iend = istart + srcSize;
-    const BYTE* const ilimit = iend - HASH_READ_SIZE;
-    U32 offset_1=rep[0], offset_2=rep[1];
-    U32 offsetSaved = 0;
-
-    size_t mLength;
-    U32 offset;
-    U32 curr;
-
-    /* how many positions to search before increasing step size */
-    const size_t kStepIncr = 1 << kSearchStrength;
-    /* the position at which to increment the step size if no match is found */
-    const BYTE* nextStep;
-    size_t step; /* the current step size */
-
-    size_t hl0; /* the long hash at ip */
-    size_t hl1; /* the long hash at ip1 */
-
-    U32 idxl0; /* the long match index for ip */
-    U32 idxl1; /* the long match index for ip1 */
-
-    const BYTE* matchl0; /* the long match for ip */
-    const BYTE* matchs0; /* the short match for ip */
-    const BYTE* matchl1; /* the long match for ip1 */
-
-    const BYTE* ip = istart; /* the current position */
-    const BYTE* ip1; /* the next position */
-
-    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");
-
-    /* init */
-    ip += ((ip - prefixLowest) == 0);
-    {
-        U32 const current = (U32)(ip - base);
-        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
-        U32 const maxRep = current - windowLow;
-        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
-        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
-    }
-
-    /* Outer Loop: one iteration per match found and stored */
-    while (1) {
-        step = 1;
-        nextStep = ip + kStepIncr;
-        ip1 = ip + step;
-
-        if (ip1 > ilimit) {
-            goto _cleanup;
-        }
-
-        hl0 = ZSTD_hashPtr(ip, hBitsL, 8);
-        idxl0 = hashLong[hl0];
-        matchl0 = base + idxl0;
-
-        /* Inner Loop: one iteration per search / position */
-        do {
-            const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls);
-            const U32 idxs0 = hashSmall[hs0];
-            curr = (U32)(ip-base);
-            matchs0 = base + idxs0;
-
-            hashLong[hl0] = hashSmall[hs0] = curr;   /* update hash tables */
-
-            /* check noDict repcode */
-            if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
-                mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
-                ip++;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
-                goto _match_stored;
-            }
-
-            hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);
-
-            if (idxl0 > prefixLowestIndex) {
-                /* check prefix long match */
-                if (MEM_read64(matchl0) == MEM_read64(ip)) {
-                    mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
-                    offset = (U32)(ip-matchl0);
-                    while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
-                    goto _match_found;
-                }
-            }
-
-            idxl1 = hashLong[hl1];
-            matchl1 = base + idxl1;
-
-            if (idxs0 > prefixLowestIndex) {
-                /* check prefix short match */
-                if (MEM_read32(matchs0) == MEM_read32(ip)) {
-                    goto _search_next_long;
-                }
-            }
-
-            if (ip1 >= nextStep) {
-                PREFETCH_L1(ip1 + 64);
-                PREFETCH_L1(ip1 + 128);
-                step++;
-                nextStep += kStepIncr;
-            }
-            ip = ip1;
-            ip1 += step;
-
-            hl0 = hl1;
-            idxl0 = idxl1;
-            matchl0 = matchl1;
-    #if defined(__aarch64__)
-            PREFETCH_L1(ip+256);
-    #endif
-        } while (ip1 <= ilimit);
-
-_cleanup:
-        /* save reps for next block */
-        rep[0] = offset_1 ? offset_1 : offsetSaved;
-        rep[1] = offset_2 ? offset_2 : offsetSaved;
-
-        /* Return the last literals size */
-        return (size_t)(iend - anchor);
-
-_search_next_long:
-
-        /* check prefix long +1 match */
-        if (idxl1 > prefixLowestIndex) {
-            if (MEM_read64(matchl1) == MEM_read64(ip1)) {
-                ip = ip1;
-                mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8;
-                offset = (U32)(ip-matchl1);
-                while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */
-                goto _match_found;
-            }
-        }
-
-        /* if no long +1 match, explore the short match we found */
-        mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
-        offset = (U32)(ip - matchs0);
-        while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */
-
-        /* fall-through */
-
-_match_found: /* requires ip, offset, mLength */
-        offset_2 = offset_1;
-        offset_1 = offset;
-
-        if (step < 4) {
-            /* It is unsafe to write this value back to the hashtable when ip1 is
-             * greater than or equal to the new ip we will have after we're done
-             * processing this match. Rather than perform that test directly
-             * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler
-             * more predictable test. The minmatch even if we take a short match is
-             * 4 bytes, so as long as step, the distance between ip and ip1
-             * (initially) is less than 4, we know ip1 < new ip. */
-            hashLong[hl1] = (U32)(ip1 - base);
-        }
-
-        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
-
-_match_stored:
-        /* match found */
-        ip += mLength;
-        anchor = ip;
-
-        if (ip <= ilimit) {
-            /* Complementary insertion */
-            /* done after iLimit test, as candidates could be > iend-8 */
-            {   U32 const indexToInsert = curr+2;
-                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
-                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
-                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
-                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
-            }
-
-            /* check immediate repcode */
-            while ( (ip <= ilimit)
-                 && ( (offset_2>0)
-                    & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
-                /* store sequence */
-                size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
-                U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
-                hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
-                hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
-                ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, rLength);
-                ip += rLength;
-                anchor = ip;
-                continue;   /* faster when present ... (?) */
-            }
-        }
-    }
-}
-
-
-FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize,
-        U32 const mls /* template */)
-{
-    ZSTD_compressionParameters const* cParams = &ms->cParams;
-    U32* const hashLong = ms->hashTable;
-    const U32 hBitsL = cParams->hashLog;
-    U32* const hashSmall = ms->chainTable;
-    const U32 hBitsS = cParams->chainLog;
-    const BYTE* const base = ms->window.base;
-    const BYTE* const istart = (const BYTE*)src;
-    const BYTE* ip = istart;
-    const BYTE* anchor = istart;
-    const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
-    /* presumes that, if there is a dictionary, it must be using Attach mode */
-    const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
-    const BYTE* const prefixLowest = base + prefixLowestIndex;
-    const BYTE* const iend = istart + srcSize;
-    const BYTE* const ilimit = iend - HASH_READ_SIZE;
-    U32 offset_1=rep[0], offset_2=rep[1];
-    U32 offsetSaved = 0;
-
-    const ZSTD_matchState_t* const dms = ms->dictMatchState;
-    const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
-    const U32* const dictHashLong  = dms->hashTable;
-    const U32* const dictHashSmall = dms->chainTable;
-    const U32 dictStartIndex       = dms->window.dictLimit;
-    const BYTE* const dictBase     = dms->window.base;
-    const BYTE* const dictStart    = dictBase + dictStartIndex;
-    const BYTE* const dictEnd      = dms->window.nextSrc;
-    const U32 dictIndexDelta       = prefixLowestIndex - (U32)(dictEnd - dictBase);
-    const U32 dictHBitsL           = dictCParams->hashLog;
-    const U32 dictHBitsS           = dictCParams->chainLog;
-    const U32 dictAndPrefixLength  = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
-
-    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
-
-    /* if a dictionary is attached, it must be within window range */
-    assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
-
-    /* init */
-    ip += (dictAndPrefixLength == 0);
-
-    /* dictMatchState repCode checks don't currently handle repCode == 0
-     * disabling. */
-    assert(offset_1 <= dictAndPrefixLength);
-    assert(offset_2 <= dictAndPrefixLength);
-
-    /* Main Search Loop */
-    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
-        size_t mLength;
-        U32 offset;
-        size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
-        size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
-        size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
-        size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
-        U32 const curr = (U32)(ip-base);
-        U32 const matchIndexL = hashLong[h2];
-        U32 matchIndexS = hashSmall[h];
-        const BYTE* matchLong = base + matchIndexL;
-        const BYTE* match = base + matchIndexS;
-        const U32 repIndex = curr + 1 - offset_1;
-        const BYTE* repMatch = (repIndex < prefixLowestIndex) ?
-                               dictBase + (repIndex - dictIndexDelta) :
-                               base + repIndex;
-        hashLong[h2] = hashSmall[h] = curr;   /* update hash tables */
-
-        /* check repcode */
-        if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
-            && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
-            const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
-            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
-            ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
-            goto _match_stored;
-        }
-
-        if (matchIndexL > prefixLowestIndex) {
-            /* check prefix long match */
-            if (MEM_read64(matchLong) == MEM_read64(ip)) {
-                mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
-                offset = (U32)(ip-matchLong);
-                while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
-                goto _match_found;
-            }
-        } else {
-            /* check dictMatchState long match */
-            U32 const dictMatchIndexL = dictHashLong[dictHL];
-            const BYTE* dictMatchL = dictBase + dictMatchIndexL;
-            assert(dictMatchL < dictEnd);
-
-            if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
-                mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
-                offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
-                while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
-                goto _match_found;
-        }   }
-
-        if (matchIndexS > prefixLowestIndex) {
-            /* check prefix short match */
-            if (MEM_read32(match) == MEM_read32(ip)) {
-                goto _search_next_long;
-            }
-        } else {
-            /* check dictMatchState short match */
-            U32 const dictMatchIndexS = dictHashSmall[dictHS];
-            match = dictBase + dictMatchIndexS;
-            matchIndexS = dictMatchIndexS + dictIndexDelta;
-
-            if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
-                goto _search_next_long;
-        }   }
-
-        ip += ((ip-anchor) >> kSearchStrength) + 1;
-#if defined(__aarch64__)
-        PREFETCH_L1(ip+256);
-#endif
-        continue;
-
-_search_next_long:
-
-        {   size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
-            size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
-            U32 const matchIndexL3 = hashLong[hl3];
-            const BYTE* matchL3 = base + matchIndexL3;
-            hashLong[hl3] = curr + 1;
-
-            /* check prefix long +1 match */
-            if (matchIndexL3 > prefixLowestIndex) {
-                if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
-                    mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
-                    ip++;
-                    offset = (U32)(ip-matchL3);
-                    while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
-                    goto _match_found;
-                }
-            } else {
-                /* check dict long +1 match */
-                U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
-                const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
-                assert(dictMatchL3 < dictEnd);
-                if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
-                    mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
-                    ip++;
-                    offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
-                    while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
-                    goto _match_found;
-        }   }   }
-
-        /* if no long +1 match, explore the short match we found */
-        if (matchIndexS < prefixLowestIndex) {
-            mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
-            offset = (U32)(curr - matchIndexS);
-            while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
-        } else {
-            mLength = ZSTD_count(ip+4, match+4, iend) + 4;
-            offset = (U32)(ip - match);
-            while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
-        }
-
-_match_found:
-        offset_2 = offset_1;
-        offset_1 = offset;
-
-        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
-
-_match_stored:
-        /* match found */
-        ip += mLength;
-        anchor = ip;
-
-        if (ip <= ilimit) {
-            /* Complementary insertion */
-            /* done after iLimit test, as candidates could be > iend-8 */
-            {   U32 const indexToInsert = curr+2;
-                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
-                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
-                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
-                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
-            }
-
-            /* check immediate repcode */
-            while (ip <= ilimit) {
-                U32 const current2 = (U32)(ip-base);
-                U32 const repIndex2 = current2 - offset_2;
-                const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
-                        dictBase + repIndex2 - dictIndexDelta :
-                        base + repIndex2;
-                if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
-                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
-                    const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
-                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
-                    U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
-                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
-                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
-                    ip += repLength2;
-                    anchor = ip;
-                    continue;
-                }
-                break;
-            }
-        }
-    }   /* while (ip < ilimit) */
-
-    /* save reps for next block */
-    rep[0] = offset_1 ? offset_1 : offsetSaved;
-    rep[1] = offset_2 ? offset_2 : offsetSaved;
-
-    /* Return the last literals size */
-    return (size_t)(iend - anchor);
-}
-
-#define ZSTD_GEN_DFAST_FN(dictMode, mls)                                                                 \
-    static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls(                                      \
-            ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],                          \
-            void const* src, size_t srcSize)                                                             \
-    {                                                                                                    \
-        return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
-    }
-
-ZSTD_GEN_DFAST_FN(noDict, 4)
-ZSTD_GEN_DFAST_FN(noDict, 5)
-ZSTD_GEN_DFAST_FN(noDict, 6)
-ZSTD_GEN_DFAST_FN(noDict, 7)
-
-ZSTD_GEN_DFAST_FN(dictMatchState, 4)
-ZSTD_GEN_DFAST_FN(dictMatchState, 5)
-ZSTD_GEN_DFAST_FN(dictMatchState, 6)
-ZSTD_GEN_DFAST_FN(dictMatchState, 7)
-
-
-size_t ZSTD_compressBlock_doubleFast(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    const U32 mls = ms->cParams.minMatch;
-    switch(mls)
-    {
-    default: /* includes case 3 */
-    case 4 :
-        return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);
-    case 5 :
-        return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);
-    case 6 :
-        return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);
-    case 7 :
-        return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);
-    }
-}
-
-
-size_t ZSTD_compressBlock_doubleFast_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    const U32 mls = ms->cParams.minMatch;
-    switch(mls)
-    {
-    default: /* includes case 3 */
-    case 4 :
-        return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
-    case 5 :
-        return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
-    case 6 :
-        return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
-    case 7 :
-        return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
-    }
-}
-
-
-static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize,
-        U32 const mls /* template */)
-{
-    ZSTD_compressionParameters const* cParams = &ms->cParams;
-    U32* const hashLong = ms->hashTable;
-    U32  const hBitsL = cParams->hashLog;
-    U32* const hashSmall = ms->chainTable;
-    U32  const hBitsS = cParams->chainLog;
-    const BYTE* const istart = (const BYTE*)src;
-    const BYTE* ip = istart;
-    const BYTE* anchor = istart;
-    const BYTE* const iend = istart + srcSize;
-    const BYTE* const ilimit = iend - 8;
-    const BYTE* const base = ms->window.base;
-    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
-    const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
-    const U32   dictStartIndex = lowLimit;
-    const U32   dictLimit = ms->window.dictLimit;
-    const U32   prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
-    const BYTE* const prefixStart = base + prefixStartIndex;
-    const BYTE* const dictBase = ms->window.dictBase;
-    const BYTE* const dictStart = dictBase + dictStartIndex;
-    const BYTE* const dictEnd = dictBase + prefixStartIndex;
-    U32 offset_1=rep[0], offset_2=rep[1];
-
-    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
-
-    /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
-    if (prefixStartIndex == dictStartIndex)
-        return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);
-
-    /* Search Loop */
-    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
-        const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
-        const U32 matchIndex = hashSmall[hSmall];
-        const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
-        const BYTE* match = matchBase + matchIndex;
-
-        const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
-        const U32 matchLongIndex = hashLong[hLong];
-        const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
-        const BYTE* matchLong = matchLongBase + matchLongIndex;
-
-        const U32 curr = (U32)(ip-base);
-        const U32 repIndex = curr + 1 - offset_1;   /* offset_1 expected <= curr +1 */
-        const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
-        const BYTE* const repMatch = repBase + repIndex;
-        size_t mLength;
-        hashSmall[hSmall] = hashLong[hLong] = curr;   /* update hash table */
-
-        if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
-            & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
-          && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
-            const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
-            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
-            ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
-        } else {
-            if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
-                const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
-                const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
-                U32 offset;
-                mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
-                offset = curr - matchLongIndex;
-                while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
-                offset_2 = offset_1;
-                offset_1 = offset;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
-
-            } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
-                size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
-                U32 const matchIndex3 = hashLong[h3];
-                const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
-                const BYTE* match3 = match3Base + matchIndex3;
-                U32 offset;
-                hashLong[h3] = curr + 1;
-                if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
-                    const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
-                    const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
-                    mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
-                    ip++;
-                    offset = curr+1 - matchIndex3;
-                    while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
-                } else {
-                    const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
-                    const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
-                    mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
-                    offset = curr - matchIndex;
-                    while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
-                }
-                offset_2 = offset_1;
-                offset_1 = offset;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
-
-            } else {
-                ip += ((ip-anchor) >> kSearchStrength) + 1;
-                continue;
-        }   }
-
-        /* move to next sequence start */
-        ip += mLength;
-        anchor = ip;
-
-        if (ip <= ilimit) {
-            /* Complementary insertion */
-            /* done after iLimit test, as candidates could be > iend-8 */
-            {   U32 const indexToInsert = curr+2;
-                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
-                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
-                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
-                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
-            }
-
-            /* check immediate repcode */
-            while (ip <= ilimit) {
-                U32 const current2 = (U32)(ip-base);
-                U32 const repIndex2 = current2 - offset_2;
-                const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
-                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3)   /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
-                    & (offset_2 <= current2 - dictStartIndex))
-                  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
-                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
-                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
-                    U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
-                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
-                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
-                    ip += repLength2;
-                    anchor = ip;
-                    continue;
-                }
-                break;
-    }   }   }
-
-    /* save reps for next block */
-    rep[0] = offset_1;
-    rep[1] = offset_2;
-
-    /* Return the last literals size */
-    return (size_t)(iend - anchor);
-}
-
-ZSTD_GEN_DFAST_FN(extDict, 4)
-ZSTD_GEN_DFAST_FN(extDict, 5)
-ZSTD_GEN_DFAST_FN(extDict, 6)
-ZSTD_GEN_DFAST_FN(extDict, 7)
-
-size_t ZSTD_compressBlock_doubleFast_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    U32 const mls = ms->cParams.minMatch;
-    switch(mls)
-    {
-    default: /* includes case 3 */
-    case 4 :
-        return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);
-    case 5 :
-        return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);
-    case 6 :
-        return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);
-    case 7 :
-        return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
-    }
-}
diff --git a/dep/zstd/lib/compress/zstd_double_fast.h b/dep/zstd/lib/compress/zstd_double_fast.h
deleted file mode 100644
index e16b7b03a..000000000
--- a/dep/zstd/lib/compress/zstd_double_fast.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef ZSTD_DOUBLE_FAST_H
-#define ZSTD_DOUBLE_FAST_H
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-#include "../common/mem.h"      /* U32 */
-#include "zstd_compress_internal.h"     /* ZSTD_CCtx, size_t */
-
-void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
-                              void const* end, ZSTD_dictTableLoadMethod_e dtlm);
-size_t ZSTD_compressBlock_doubleFast(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_doubleFast_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_doubleFast_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif /* ZSTD_DOUBLE_FAST_H */
diff --git a/dep/zstd/lib/compress/zstd_fast.c b/dep/zstd/lib/compress/zstd_fast.c
deleted file mode 100644
index 802fc3157..000000000
--- a/dep/zstd/lib/compress/zstd_fast.c
+++ /dev/null
@@ -1,675 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#include "zstd_compress_internal.h"  /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
-#include "zstd_fast.h"
-
-
-void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
-                        const void* const end,
-                        ZSTD_dictTableLoadMethod_e dtlm)
-{
-    const ZSTD_compressionParameters* const cParams = &ms->cParams;
-    U32* const hashTable = ms->hashTable;
-    U32  const hBits = cParams->hashLog;
-    U32  const mls = cParams->minMatch;
-    const BYTE* const base = ms->window.base;
-    const BYTE* ip = base + ms->nextToUpdate;
-    const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
-    const U32 fastHashFillStep = 3;
-
-    /* Always insert every fastHashFillStep position into the hash table.
-     * Insert the other positions if their hash entry is empty.
-     */
-    for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
-        U32 const curr = (U32)(ip - base);
-        size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
-        hashTable[hash0] = curr;
-        if (dtlm == ZSTD_dtlm_fast) continue;
-        /* Only load extra positions for ZSTD_dtlm_full */
-        {   U32 p;
-            for (p = 1; p < fastHashFillStep; ++p) {
-                size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
-                if (hashTable[hash] == 0) {  /* not yet filled */
-                    hashTable[hash] = curr + p;
-    }   }   }   }
-}
-
-
-/**
- * If you squint hard enough (and ignore repcodes), the search operation at any
- * given position is broken into 4 stages:
- *
- * 1. Hash   (map position to hash value via input read)
- * 2. Lookup (map hash val to index via hashtable read)
- * 3. Load   (map index to value at that position via input read)
- * 4. Compare
- *
- * Each of these steps involves a memory read at an address which is computed
- * from the previous step. This means these steps must be sequenced and their
- * latencies are cumulative.
- *
- * Rather than do 1->2->3->4 sequentially for a single position before moving
- * onto the next, this implementation interleaves these operations across the
- * next few positions:
- *
- * R = Repcode Read & Compare
- * H = Hash
- * T = Table Lookup
- * M = Match Read & Compare
- *
- * Pos | Time -->
- * ----+-------------------
- * N   | ... M
- * N+1 | ...   TM
- * N+2 |    R H   T M
- * N+3 |         H    TM
- * N+4 |           R H   T M
- * N+5 |                H   ...
- * N+6 |                  R ...
- *
- * This is very much analogous to the pipelining of execution in a CPU. And just
- * like a CPU, we have to dump the pipeline when we find a match (i.e., take a
- * branch).
- *
- * When this happens, we throw away our current state, and do the following prep
- * to re-enter the loop:
- *
- * Pos | Time -->
- * ----+-------------------
- * N   | H T
- * N+1 |  H
- *
- * This is also the work we do at the beginning to enter the loop initially.
- */
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_compressBlock_fast_noDict_generic(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize,
-        U32 const mls, U32 const hasStep)
-{
-    const ZSTD_compressionParameters* const cParams = &ms->cParams;
-    U32* const hashTable = ms->hashTable;
-    U32 const hlog = cParams->hashLog;
-    /* support stepSize of 0 */
-    size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2;
-    const BYTE* const base = ms->window.base;
-    const BYTE* const istart = (const BYTE*)src;
-    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
-    const U32   prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
-    const BYTE* const prefixStart = base + prefixStartIndex;
-    const BYTE* const iend = istart + srcSize;
-    const BYTE* const ilimit = iend - HASH_READ_SIZE;
-
-    const BYTE* anchor = istart;
-    const BYTE* ip0 = istart;
-    const BYTE* ip1;
-    const BYTE* ip2;
-    const BYTE* ip3;
-    U32 current0;
-
-    U32 rep_offset1 = rep[0];
-    U32 rep_offset2 = rep[1];
-    U32 offsetSaved = 0;
-
-    size_t hash0; /* hash for ip0 */
-    size_t hash1; /* hash for ip1 */
-    U32 idx; /* match idx for ip0 */
-    U32 mval; /* src value at match idx */
-
-    U32 offcode;
-    const BYTE* match0;
-    size_t mLength;
-
-    /* ip0 and ip1 are always adjacent. The targetLength skipping and
-     * uncompressibility acceleration is applied to every other position,
-     * matching the behavior of #1562. step therefore represents the gap
-     * between pairs of positions, from ip0 to ip2 or ip1 to ip3. */
-    size_t step;
-    const BYTE* nextStep;
-    const size_t kStepIncr = (1 << (kSearchStrength - 1));
-
-    DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
-    ip0 += (ip0 == prefixStart);
-    {   U32 const curr = (U32)(ip0 - base);
-        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
-        U32 const maxRep = curr - windowLow;
-        if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0;
-        if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0;
-    }
-
-    /* start each op */
-_start: /* Requires: ip0 */
-
-    step = stepSize;
-    nextStep = ip0 + kStepIncr;
-
-    /* calculate positions, ip0 - anchor == 0, so we skip step calc */
-    ip1 = ip0 + 1;
-    ip2 = ip0 + step;
-    ip3 = ip2 + 1;
-
-    if (ip3 >= ilimit) {
-        goto _cleanup;
-    }
-
-    hash0 = ZSTD_hashPtr(ip0, hlog, mls);
-    hash1 = ZSTD_hashPtr(ip1, hlog, mls);
-
-    idx = hashTable[hash0];
-
-    do {
-        /* load repcode match for ip[2]*/
-        const U32 rval = MEM_read32(ip2 - rep_offset1);
-
-        /* write back hash table entry */
-        current0 = (U32)(ip0 - base);
-        hashTable[hash0] = current0;
-
-        /* check repcode at ip[2] */
-        if ((MEM_read32(ip2) == rval) & (rep_offset1 > 0)) {
-            ip0 = ip2;
-            match0 = ip0 - rep_offset1;
-            mLength = ip0[-1] == match0[-1];
-            ip0 -= mLength;
-            match0 -= mLength;
-            offcode = STORE_REPCODE_1;
-            mLength += 4;
-            goto _match;
-        }
-
-        /* load match for ip[0] */
-        if (idx >= prefixStartIndex) {
-            mval = MEM_read32(base + idx);
-        } else {
-            mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
-        }
-
-        /* check match at ip[0] */
-        if (MEM_read32(ip0) == mval) {
-            /* found a match! */
-            goto _offset;
-        }
-
-        /* lookup ip[1] */
-        idx = hashTable[hash1];
-
-        /* hash ip[2] */
-        hash0 = hash1;
-        hash1 = ZSTD_hashPtr(ip2, hlog, mls);
-
-        /* advance to next positions */
-        ip0 = ip1;
-        ip1 = ip2;
-        ip2 = ip3;
-
-        /* write back hash table entry */
-        current0 = (U32)(ip0 - base);
-        hashTable[hash0] = current0;
-
-        /* load match for ip[0] */
-        if (idx >= prefixStartIndex) {
-            mval = MEM_read32(base + idx);
-        } else {
-            mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
-        }
-
-        /* check match at ip[0] */
-        if (MEM_read32(ip0) == mval) {
-            /* found a match! */
-            goto _offset;
-        }
-
-        /* lookup ip[1] */
-        idx = hashTable[hash1];
-
-        /* hash ip[2] */
-        hash0 = hash1;
-        hash1 = ZSTD_hashPtr(ip2, hlog, mls);
-
-        /* advance to next positions */
-        ip0 = ip1;
-        ip1 = ip2;
-        ip2 = ip0 + step;
-        ip3 = ip1 + step;
-
-        /* calculate step */
-        if (ip2 >= nextStep) {
-            step++;
-            PREFETCH_L1(ip1 + 64);
-            PREFETCH_L1(ip1 + 128);
-            nextStep += kStepIncr;
-        }
-    } while (ip3 < ilimit);
-
-_cleanup:
-    /* Note that there are probably still a couple positions we could search.
-     * However, it seems to be a meaningful performance hit to try to search
-     * them. So let's not. */
-
-    /* save reps for next block */
-    rep[0] = rep_offset1 ? rep_offset1 : offsetSaved;
-    rep[1] = rep_offset2 ? rep_offset2 : offsetSaved;
-
-    /* Return the last literals size */
-    return (size_t)(iend - anchor);
-
-_offset: /* Requires: ip0, idx */
-
-    /* Compute the offset code. */
-    match0 = base + idx;
-    rep_offset2 = rep_offset1;
-    rep_offset1 = (U32)(ip0-match0);
-    offcode = STORE_OFFSET(rep_offset1);
-    mLength = 4;
-
-    /* Count the backwards match length. */
-    while (((ip0>anchor) & (match0>prefixStart)) && (ip0[-1] == match0[-1])) {
-        ip0--;
-        match0--;
-        mLength++;
-    }
-
-_match: /* Requires: ip0, match0, offcode */
-
-    /* Count the forward length. */
-    mLength += ZSTD_count(ip0 + mLength, match0 + mLength, iend);
-
-    ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength);
-
-    ip0 += mLength;
-    anchor = ip0;
-
-    /* write next hash table entry */
-    if (ip1 < ip0) {
-        hashTable[hash1] = (U32)(ip1 - base);
-    }
-
-    /* Fill table and check for immediate repcode. */
-    if (ip0 <= ilimit) {
-        /* Fill Table */
-        assert(base+current0+2 > istart);  /* check base overflow */
-        hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2;  /* here because current+2 could be > iend-8 */
-        hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
-
-        if (rep_offset2 > 0) { /* rep_offset2==0 means rep_offset2 is invalidated */
-            while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - rep_offset2)) ) {
-                /* store sequence */
-                size_t const rLength = ZSTD_count(ip0+4, ip0+4-rep_offset2, iend) + 4;
-                { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */
-                hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
-                ip0 += rLength;
-                ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, STORE_REPCODE_1, rLength);
-                anchor = ip0;
-                continue;   /* faster when present (confirmed on gcc-8) ... (?) */
-    }   }   }
-
-    goto _start;
-}
-
-#define ZSTD_GEN_FAST_FN(dictMode, mls, step)                                                            \
-    static size_t ZSTD_compressBlock_fast_##dictMode##_##mls##_##step(                                      \
-            ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],                    \
-            void const* src, size_t srcSize)                                                       \
-    {                                                                                              \
-        return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls, step); \
-    }
-
-ZSTD_GEN_FAST_FN(noDict, 4, 1)
-ZSTD_GEN_FAST_FN(noDict, 5, 1)
-ZSTD_GEN_FAST_FN(noDict, 6, 1)
-ZSTD_GEN_FAST_FN(noDict, 7, 1)
-
-ZSTD_GEN_FAST_FN(noDict, 4, 0)
-ZSTD_GEN_FAST_FN(noDict, 5, 0)
-ZSTD_GEN_FAST_FN(noDict, 6, 0)
-ZSTD_GEN_FAST_FN(noDict, 7, 0)
-
-size_t ZSTD_compressBlock_fast(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    U32 const mls = ms->cParams.minMatch;
-    assert(ms->dictMatchState == NULL);
-    if (ms->cParams.targetLength > 1) {
-        switch(mls)
-        {
-        default: /* includes case 3 */
-        case 4 :
-            return ZSTD_compressBlock_fast_noDict_4_1(ms, seqStore, rep, src, srcSize);
-        case 5 :
-            return ZSTD_compressBlock_fast_noDict_5_1(ms, seqStore, rep, src, srcSize);
-        case 6 :
-            return ZSTD_compressBlock_fast_noDict_6_1(ms, seqStore, rep, src, srcSize);
-        case 7 :
-            return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize);
-        }
-    } else {
-        switch(mls)
-        {
-        default: /* includes case 3 */
-        case 4 :
-            return ZSTD_compressBlock_fast_noDict_4_0(ms, seqStore, rep, src, srcSize);
-        case 5 :
-            return ZSTD_compressBlock_fast_noDict_5_0(ms, seqStore, rep, src, srcSize);
-        case 6 :
-            return ZSTD_compressBlock_fast_noDict_6_0(ms, seqStore, rep, src, srcSize);
-        case 7 :
-            return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize);
-        }
-
-    }
-}
-
-FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_fast_dictMatchState_generic(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
-{
-    const ZSTD_compressionParameters* const cParams = &ms->cParams;
-    U32* const hashTable = ms->hashTable;
-    U32 const hlog = cParams->hashLog;
-    /* support stepSize of 0 */
-    U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
-    const BYTE* const base = ms->window.base;
-    const BYTE* const istart = (const BYTE*)src;
-    const BYTE* ip = istart;
-    const BYTE* anchor = istart;
-    const U32   prefixStartIndex = ms->window.dictLimit;
-    const BYTE* const prefixStart = base + prefixStartIndex;
-    const BYTE* const iend = istart + srcSize;
-    const BYTE* const ilimit = iend - HASH_READ_SIZE;
-    U32 offset_1=rep[0], offset_2=rep[1];
-    U32 offsetSaved = 0;
-
-    const ZSTD_matchState_t* const dms = ms->dictMatchState;
-    const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
-    const U32* const dictHashTable = dms->hashTable;
-    const U32 dictStartIndex       = dms->window.dictLimit;
-    const BYTE* const dictBase     = dms->window.base;
-    const BYTE* const dictStart    = dictBase + dictStartIndex;
-    const BYTE* const dictEnd      = dms->window.nextSrc;
-    const U32 dictIndexDelta       = prefixStartIndex - (U32)(dictEnd - dictBase);
-    const U32 dictAndPrefixLength  = (U32)(ip - prefixStart + dictEnd - dictStart);
-    const U32 dictHLog             = dictCParams->hashLog;
-
-    /* if a dictionary is still attached, it necessarily means that
-     * it is within window size. So we just check it. */
-    const U32 maxDistance = 1U << cParams->windowLog;
-    const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
-    assert(endIndex - prefixStartIndex <= maxDistance);
-    (void)maxDistance; (void)endIndex;   /* these variables are not used when assert() is disabled */
-
-    (void)hasStep; /* not currently specialized on whether it's accelerated */
-
-    /* ensure there will be no underflow
-     * when translating a dict index into a local index */
-    assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
-
-    /* init */
-    DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
-    ip += (dictAndPrefixLength == 0);
-    /* dictMatchState repCode checks don't currently handle repCode == 0
-     * disabling. */
-    assert(offset_1 <= dictAndPrefixLength);
-    assert(offset_2 <= dictAndPrefixLength);
-
-    /* Main Search Loop */
-    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
-        size_t mLength;
-        size_t const h = ZSTD_hashPtr(ip, hlog, mls);
-        U32 const curr = (U32)(ip-base);
-        U32 const matchIndex = hashTable[h];
-        const BYTE* match = base + matchIndex;
-        const U32 repIndex = curr + 1 - offset_1;
-        const BYTE* repMatch = (repIndex < prefixStartIndex) ?
-                               dictBase + (repIndex - dictIndexDelta) :
-                               base + repIndex;
-        hashTable[h] = curr;   /* update hash table */
-
-        if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
-          && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
-            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
-            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
-            ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
-        } else if ( (matchIndex <= prefixStartIndex) ) {
-            size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
-            U32 const dictMatchIndex = dictHashTable[dictHash];
-            const BYTE* dictMatch = dictBase + dictMatchIndex;
-            if (dictMatchIndex <= dictStartIndex ||
-                MEM_read32(dictMatch) != MEM_read32(ip)) {
-                assert(stepSize >= 1);
-                ip += ((ip-anchor) >> kSearchStrength) + stepSize;
-                continue;
-            } else {
-                /* found a dict match */
-                U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
-                mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
-                while (((ip>anchor) & (dictMatch>dictStart))
-                     && (ip[-1] == dictMatch[-1])) {
-                    ip--; dictMatch--; mLength++;
-                } /* catch up */
-                offset_2 = offset_1;
-                offset_1 = offset;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
-            }
-        } else if (MEM_read32(match) != MEM_read32(ip)) {
-            /* it's not a match, and we're not going to check the dictionary */
-            assert(stepSize >= 1);
-            ip += ((ip-anchor) >> kSearchStrength) + stepSize;
-            continue;
-        } else {
-            /* found a regular match */
-            U32 const offset = (U32)(ip-match);
-            mLength = ZSTD_count(ip+4, match+4, iend) + 4;
-            while (((ip>anchor) & (match>prefixStart))
-                 && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
-            offset_2 = offset_1;
-            offset_1 = offset;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
-        }
-
-        /* match found */
-        ip += mLength;
-        anchor = ip;
-
-        if (ip <= ilimit) {
-            /* Fill Table */
-            assert(base+curr+2 > istart);  /* check base overflow */
-            hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;  /* here because curr+2 could be > iend-8 */
-            hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
-
-            /* check immediate repcode */
-            while (ip <= ilimit) {
-                U32 const current2 = (U32)(ip-base);
-                U32 const repIndex2 = current2 - offset_2;
-                const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
-                        dictBase - dictIndexDelta + repIndex2 :
-                        base + repIndex2;
-                if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
-                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
-                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
-                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
-                    U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
-                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
-                    ip += repLength2;
-                    anchor = ip;
-                    continue;
-                }
-                break;
-            }
-        }
-    }
-
-    /* save reps for next block */
-    rep[0] = offset_1 ? offset_1 : offsetSaved;
-    rep[1] = offset_2 ? offset_2 : offsetSaved;
-
-    /* Return the last literals size */
-    return (size_t)(iend - anchor);
-}
-
-
-ZSTD_GEN_FAST_FN(dictMatchState, 4, 0)
-ZSTD_GEN_FAST_FN(dictMatchState, 5, 0)
-ZSTD_GEN_FAST_FN(dictMatchState, 6, 0)
-ZSTD_GEN_FAST_FN(dictMatchState, 7, 0)
-
-size_t ZSTD_compressBlock_fast_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    U32 const mls = ms->cParams.minMatch;
-    assert(ms->dictMatchState != NULL);
-    switch(mls)
-    {
-    default: /* includes case 3 */
-    case 4 :
-        return ZSTD_compressBlock_fast_dictMatchState_4_0(ms, seqStore, rep, src, srcSize);
-    case 5 :
-        return ZSTD_compressBlock_fast_dictMatchState_5_0(ms, seqStore, rep, src, srcSize);
-    case 6 :
-        return ZSTD_compressBlock_fast_dictMatchState_6_0(ms, seqStore, rep, src, srcSize);
-    case 7 :
-        return ZSTD_compressBlock_fast_dictMatchState_7_0(ms, seqStore, rep, src, srcSize);
-    }
-}
-
-
-static size_t ZSTD_compressBlock_fast_extDict_generic(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
-{
-    const ZSTD_compressionParameters* const cParams = &ms->cParams;
-    U32* const hashTable = ms->hashTable;
-    U32 const hlog = cParams->hashLog;
-    /* support stepSize of 0 */
-    U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
-    const BYTE* const base = ms->window.base;
-    const BYTE* const dictBase = ms->window.dictBase;
-    const BYTE* const istart = (const BYTE*)src;
-    const BYTE* ip = istart;
-    const BYTE* anchor = istart;
-    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
-    const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
-    const U32   dictStartIndex = lowLimit;
-    const BYTE* const dictStart = dictBase + dictStartIndex;
-    const U32   dictLimit = ms->window.dictLimit;
-    const U32   prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
-    const BYTE* const prefixStart = base + prefixStartIndex;
-    const BYTE* const dictEnd = dictBase + prefixStartIndex;
-    const BYTE* const iend = istart + srcSize;
-    const BYTE* const ilimit = iend - 8;
-    U32 offset_1=rep[0], offset_2=rep[1];
-
-    (void)hasStep; /* not currently specialized on whether it's accelerated */
-
-    DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
-
-    /* switch to "regular" variant if extDict is invalidated due to maxDistance */
-    if (prefixStartIndex == dictStartIndex)
-        return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize);
-
-    /* Search Loop */
-    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
-        const size_t h = ZSTD_hashPtr(ip, hlog, mls);
-        const U32    matchIndex = hashTable[h];
-        const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
-        const BYTE*  match = matchBase + matchIndex;
-        const U32    curr = (U32)(ip-base);
-        const U32    repIndex = curr + 1 - offset_1;
-        const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
-        const BYTE* const repMatch = repBase + repIndex;
-        hashTable[h] = curr;   /* update hash table */
-        DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
-
-        if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
-             & (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
-           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
-            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
-            size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
-            ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, rLength);
-            ip += rLength;
-            anchor = ip;
-        } else {
-            if ( (matchIndex < dictStartIndex) ||
-                 (MEM_read32(match) != MEM_read32(ip)) ) {
-                assert(stepSize >= 1);
-                ip += ((ip-anchor) >> kSearchStrength) + stepSize;
-                continue;
-            }
-            {   const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
-                const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
-                U32 const offset = curr - matchIndex;
-                size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
-                while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
-                offset_2 = offset_1; offset_1 = offset;  /* update offset history */
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
-                ip += mLength;
-                anchor = ip;
-        }   }
-
-        if (ip <= ilimit) {
-            /* Fill Table */
-            hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
-            hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
-            /* check immediate repcode */
-            while (ip <= ilimit) {
-                U32 const current2 = (U32)(ip-base);
-                U32 const repIndex2 = current2 - offset_2;
-                const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
-                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex))  /* intentional overflow */
-                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
-                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
-                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
-                    { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; }  /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, STORE_REPCODE_1, repLength2);
-                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
-                    ip += repLength2;
-                    anchor = ip;
-                    continue;
-                }
-                break;
-    }   }   }
-
-    /* save reps for next block */
-    rep[0] = offset_1;
-    rep[1] = offset_2;
-
-    /* Return the last literals size */
-    return (size_t)(iend - anchor);
-}
-
-ZSTD_GEN_FAST_FN(extDict, 4, 0)
-ZSTD_GEN_FAST_FN(extDict, 5, 0)
-ZSTD_GEN_FAST_FN(extDict, 6, 0)
-ZSTD_GEN_FAST_FN(extDict, 7, 0)
-
-size_t ZSTD_compressBlock_fast_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    U32 const mls = ms->cParams.minMatch;
-    switch(mls)
-    {
-    default: /* includes case 3 */
-    case 4 :
-        return ZSTD_compressBlock_fast_extDict_4_0(ms, seqStore, rep, src, srcSize);
-    case 5 :
-        return ZSTD_compressBlock_fast_extDict_5_0(ms, seqStore, rep, src, srcSize);
-    case 6 :
-        return ZSTD_compressBlock_fast_extDict_6_0(ms, seqStore, rep, src, srcSize);
-    case 7 :
-        return ZSTD_compressBlock_fast_extDict_7_0(ms, seqStore, rep, src, srcSize);
-    }
-}
diff --git a/dep/zstd/lib/compress/zstd_fast.h b/dep/zstd/lib/compress/zstd_fast.h
deleted file mode 100644
index 0d4a0c109..000000000
--- a/dep/zstd/lib/compress/zstd_fast.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef ZSTD_FAST_H
-#define ZSTD_FAST_H
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-#include "../common/mem.h"      /* U32 */
-#include "zstd_compress_internal.h"
-
-void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
-                        void const* end, ZSTD_dictTableLoadMethod_e dtlm);
-size_t ZSTD_compressBlock_fast(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_fast_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_fast_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif /* ZSTD_FAST_H */
diff --git a/dep/zstd/lib/compress/zstd_lazy.c b/dep/zstd/lib/compress/zstd_lazy.c
deleted file mode 100644
index 2e38dcb46..000000000
--- a/dep/zstd/lib/compress/zstd_lazy.c
+++ /dev/null
@@ -1,2104 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#include "zstd_compress_internal.h"
-#include "zstd_lazy.h"
-
-
-/*-*************************************
-*  Binary Tree search
-***************************************/
-
-static void
-ZSTD_updateDUBT(ZSTD_matchState_t* ms,
-                const BYTE* ip, const BYTE* iend,
-                U32 mls)
-{
-    const ZSTD_compressionParameters* const cParams = &ms->cParams;
-    U32* const hashTable = ms->hashTable;
-    U32  const hashLog = cParams->hashLog;
-
-    U32* const bt = ms->chainTable;
-    U32  const btLog  = cParams->chainLog - 1;
-    U32  const btMask = (1 << btLog) - 1;
-
-    const BYTE* const base = ms->window.base;
-    U32 const target = (U32)(ip - base);
-    U32 idx = ms->nextToUpdate;
-
-    if (idx != target)
-        DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
-                    idx, target, ms->window.dictLimit);
-    assert(ip + 8 <= iend);   /* condition for ZSTD_hashPtr */
-    (void)iend;
-
-    assert(idx >= ms->window.dictLimit);   /* condition for valid base+idx */
-    for ( ; idx < target ; idx++) {
-        size_t const h  = ZSTD_hashPtr(base + idx, hashLog, mls);   /* assumption : ip + 8 <= iend */
-        U32    const matchIndex = hashTable[h];
-
-        U32*   const nextCandidatePtr = bt + 2*(idx&btMask);
-        U32*   const sortMarkPtr  = nextCandidatePtr + 1;
-
-        DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
-        hashTable[h] = idx;   /* Update Hash Table */
-        *nextCandidatePtr = matchIndex;   /* update BT like a chain */
-        *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
-    }
-    ms->nextToUpdate = target;
-}
-
-
-/** ZSTD_insertDUBT1() :
- *  sort one already inserted but unsorted position
- *  assumption : curr >= btlow == (curr - btmask)
- *  doesn't fail */
-static void
-ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
-                 U32 curr, const BYTE* inputEnd,
-                 U32 nbCompares, U32 btLow,
-                 const ZSTD_dictMode_e dictMode)
-{
-    const ZSTD_compressionParameters* const cParams = &ms->cParams;
-    U32* const bt = ms->chainTable;
-    U32  const btLog  = cParams->chainLog - 1;
-    U32  const btMask = (1 << btLog) - 1;
-    size_t commonLengthSmaller=0, commonLengthLarger=0;
-    const BYTE* const base = ms->window.base;
-    const BYTE* const dictBase = ms->window.dictBase;
-    const U32 dictLimit = ms->window.dictLimit;
-    const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr;
-    const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit;
-    const BYTE* const dictEnd = dictBase + dictLimit;
-    const BYTE* const prefixStart = base + dictLimit;
-    const BYTE* match;
-    U32* smallerPtr = bt + 2*(curr&btMask);
-    U32* largerPtr  = smallerPtr + 1;
-    U32 matchIndex = *smallerPtr;   /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
-    U32 dummy32;   /* to be nullified at the end */
-    U32 const windowValid = ms->window.lowLimit;
-    U32 const maxDistance = 1U << cParams->windowLog;
-    U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid;
-
-
-    DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
-                curr, dictLimit, windowLow);
-    assert(curr >= btLow);
-    assert(ip < iend);   /* condition for ZSTD_count */
-
-    for (; nbCompares && (matchIndex > windowLow); --nbCompares) {
-        U32* const nextPtr = bt + 2*(matchIndex & btMask);
-        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
-        assert(matchIndex < curr);
-        /* note : all candidates are now supposed sorted,
-         * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
-         * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
-
-        if ( (dictMode != ZSTD_extDict)
-          || (matchIndex+matchLength >= dictLimit)  /* both in current segment*/
-          || (curr < dictLimit) /* both in extDict */) {
-            const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
-                                     || (matchIndex+matchLength >= dictLimit)) ?
-                                        base : dictBase;
-            assert( (matchIndex+matchLength >= dictLimit)   /* might be wrong if extDict is incorrectly set to 0 */
-                 || (curr < dictLimit) );
-            match = mBase + matchIndex;
-            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
-        } else {
-            match = dictBase + matchIndex;
-            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
-            if (matchIndex+matchLength >= dictLimit)
-                match = base + matchIndex;   /* preparation for next read of match[matchLength] */
-        }
-
-        DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
-                    curr, matchIndex, (U32)matchLength);
-
-        if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
-            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
-        }
-
-        if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
-            /* match is smaller than current */
-            *smallerPtr = matchIndex;             /* update smaller idx */
-            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
-            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
-            DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
-                        matchIndex, btLow, nextPtr[1]);
-            smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
-            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
-        } else {
-            /* match is larger than current */
-            *largerPtr = matchIndex;
-            commonLengthLarger = matchLength;
-            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
-            DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
-                        matchIndex, btLow, nextPtr[0]);
-            largerPtr = nextPtr;
-            matchIndex = nextPtr[0];
-    }   }
-
-    *smallerPtr = *largerPtr = 0;
-}
-
-
-static size_t
-ZSTD_DUBT_findBetterDictMatch (
-        const ZSTD_matchState_t* ms,
-        const BYTE* const ip, const BYTE* const iend,
-        size_t* offsetPtr,
-        size_t bestLength,
-        U32 nbCompares,
-        U32 const mls,
-        const ZSTD_dictMode_e dictMode)
-{
-    const ZSTD_matchState_t * const dms = ms->dictMatchState;
-    const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
-    const U32 * const dictHashTable = dms->hashTable;
-    U32         const hashLog = dmsCParams->hashLog;
-    size_t      const h  = ZSTD_hashPtr(ip, hashLog, mls);
-    U32               dictMatchIndex = dictHashTable[h];
-
-    const BYTE* const base = ms->window.base;
-    const BYTE* const prefixStart = base + ms->window.dictLimit;
-    U32         const curr = (U32)(ip-base);
-    const BYTE* const dictBase = dms->window.base;
-    const BYTE* const dictEnd = dms->window.nextSrc;
-    U32         const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
-    U32         const dictLowLimit = dms->window.lowLimit;
-    U32         const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
-
-    U32*        const dictBt = dms->chainTable;
-    U32         const btLog  = dmsCParams->chainLog - 1;
-    U32         const btMask = (1 << btLog) - 1;
-    U32         const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
-
-    size_t commonLengthSmaller=0, commonLengthLarger=0;
-
-    (void)dictMode;
-    assert(dictMode == ZSTD_dictMatchState);
-
-    for (; nbCompares && (dictMatchIndex > dictLowLimit); --nbCompares) {
-        U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
-        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
-        const BYTE* match = dictBase + dictMatchIndex;
-        matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
-        if (dictMatchIndex+matchLength >= dictHighLimit)
-            match = base + dictMatchIndex + dictIndexDelta;   /* to prepare for next usage of match[matchLength] */
-
-        if (matchLength > bestLength) {
-            U32 matchIndex = dictMatchIndex + dictIndexDelta;
-            if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
-                DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
-                    curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, STORE_OFFSET(curr - matchIndex), dictMatchIndex, matchIndex);
-                bestLength = matchLength, *offsetPtr = STORE_OFFSET(curr - matchIndex);
-            }
-            if (ip+matchLength == iend) {   /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
-                break;   /* drop, to guarantee consistency (miss a little bit of compression) */
-            }
-        }
-
-        if (match[matchLength] < ip[matchLength]) {
-            if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
-            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
-            dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
-        } else {
-            /* match is larger than current */
-            if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
-            commonLengthLarger = matchLength;
-            dictMatchIndex = nextPtr[0];
-        }
-    }
-
-    if (bestLength >= MINMATCH) {
-        U32 const mIndex = curr - (U32)STORED_OFFSET(*offsetPtr); (void)mIndex;
-        DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
-                    curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
-    }
-    return bestLength;
-
-}
-
-
-static size_t
-ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
-                        const BYTE* const ip, const BYTE* const iend,
-                        size_t* offsetPtr,
-                        U32 const mls,
-                        const ZSTD_dictMode_e dictMode)
-{
-    const ZSTD_compressionParameters* const cParams = &ms->cParams;
-    U32*   const hashTable = ms->hashTable;
-    U32    const hashLog = cParams->hashLog;
-    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
-    U32          matchIndex  = hashTable[h];
-
-    const BYTE* const base = ms->window.base;
-    U32    const curr = (U32)(ip-base);
-    U32    const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
-
-    U32*   const bt = ms->chainTable;
-    U32    const btLog  = cParams->chainLog - 1;
-    U32    const btMask = (1 << btLog) - 1;
-    U32    const btLow = (btMask >= curr) ? 0 : curr - btMask;
-    U32    const unsortLimit = MAX(btLow, windowLow);
-
-    U32*         nextCandidate = bt + 2*(matchIndex&btMask);
-    U32*         unsortedMark = bt + 2*(matchIndex&btMask) + 1;
-    U32          nbCompares = 1U << cParams->searchLog;
-    U32          nbCandidates = nbCompares;
-    U32          previousCandidate = 0;
-
-    DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr);
-    assert(ip <= iend-8);   /* required for h calculation */
-    assert(dictMode != ZSTD_dedicatedDictSearch);
-
-    /* reach end of unsorted candidates list */
-    while ( (matchIndex > unsortLimit)
-         && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
-         && (nbCandidates > 1) ) {
-        DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
-                    matchIndex);
-        *unsortedMark = previousCandidate;  /* the unsortedMark becomes a reversed chain, to move up back to original position */
-        previousCandidate = matchIndex;
-        matchIndex = *nextCandidate;
-        nextCandidate = bt + 2*(matchIndex&btMask);
-        unsortedMark = bt + 2*(matchIndex&btMask) + 1;
-        nbCandidates --;
-    }
-
-    /* nullify last candidate if it's still unsorted
-     * simplification, detrimental to compression ratio, beneficial for speed */
-    if ( (matchIndex > unsortLimit)
-      && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
-        DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
-                    matchIndex);
-        *nextCandidate = *unsortedMark = 0;
-    }
-
-    /* batch sort stacked candidates */
-    matchIndex = previousCandidate;
-    while (matchIndex) {  /* will end on matchIndex == 0 */
-        U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
-        U32 const nextCandidateIdx = *nextCandidateIdxPtr;
-        ZSTD_insertDUBT1(ms, matchIndex, iend,
-                         nbCandidates, unsortLimit, dictMode);
-        matchIndex = nextCandidateIdx;
-        nbCandidates++;
-    }
-
-    /* find longest match */
-    {   size_t commonLengthSmaller = 0, commonLengthLarger = 0;
-        const BYTE* const dictBase = ms->window.dictBase;
-        const U32 dictLimit = ms->window.dictLimit;
-        const BYTE* const dictEnd = dictBase + dictLimit;
-        const BYTE* const prefixStart = base + dictLimit;
-        U32* smallerPtr = bt + 2*(curr&btMask);
-        U32* largerPtr  = bt + 2*(curr&btMask) + 1;
-        U32 matchEndIdx = curr + 8 + 1;
-        U32 dummy32;   /* to be nullified at the end */
-        size_t bestLength = 0;
-
-        matchIndex  = hashTable[h];
-        hashTable[h] = curr;   /* Update Hash Table */
-
-        for (; nbCompares && (matchIndex > windowLow); --nbCompares) {
-            U32* const nextPtr = bt + 2*(matchIndex & btMask);
-            size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
-            const BYTE* match;
-
-            if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
-                match = base + matchIndex;
-                matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
-            } else {
-                match = dictBase + matchIndex;
-                matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
-                if (matchIndex+matchLength >= dictLimit)
-                    match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
-            }
-
-            if (matchLength > bestLength) {
-                if (matchLength > matchEndIdx - matchIndex)
-                    matchEndIdx = matchIndex + (U32)matchLength;
-                if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
-                    bestLength = matchLength, *offsetPtr = STORE_OFFSET(curr - matchIndex);
-                if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
-                    if (dictMode == ZSTD_dictMatchState) {
-                        nbCompares = 0; /* in addition to avoiding checking any
-                                         * further in this loop, make sure we
-                                         * skip checking in the dictionary. */
-                    }
-                    break;   /* drop, to guarantee consistency (miss a little bit of compression) */
-                }
-            }
-
-            if (match[matchLength] < ip[matchLength]) {
-                /* match is smaller than current */
-                *smallerPtr = matchIndex;             /* update smaller idx */
-                commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
-                if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
-                smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
-                matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
-            } else {
-                /* match is larger than current */
-                *largerPtr = matchIndex;
-                commonLengthLarger = matchLength;
-                if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
-                largerPtr = nextPtr;
-                matchIndex = nextPtr[0];
-        }   }
-
-        *smallerPtr = *largerPtr = 0;
-
-        assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
-        if (dictMode == ZSTD_dictMatchState && nbCompares) {
-            bestLength = ZSTD_DUBT_findBetterDictMatch(
-                    ms, ip, iend,
-                    offsetPtr, bestLength, nbCompares,
-                    mls, dictMode);
-        }
-
-        assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
-        ms->nextToUpdate = matchEndIdx - 8;   /* skip repetitive patterns */
-        if (bestLength >= MINMATCH) {
-            U32 const mIndex = curr - (U32)STORED_OFFSET(*offsetPtr); (void)mIndex;
-            DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
-                        curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
-        }
-        return bestLength;
-    }
-}
-
-
-/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
-                const BYTE* const ip, const BYTE* const iLimit,
-                      size_t* offsetPtr,
-                const U32 mls /* template */,
-                const ZSTD_dictMode_e dictMode)
-{
-    DEBUGLOG(7, "ZSTD_BtFindBestMatch");
-    if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
-    ZSTD_updateDUBT(ms, ip, iLimit, mls);
-    return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
-}
-
-/***********************************
-* Dedicated dict search
-***********************************/
-
-void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
-{
-    const BYTE* const base = ms->window.base;
-    U32 const target = (U32)(ip - base);
-    U32* const hashTable = ms->hashTable;
-    U32* const chainTable = ms->chainTable;
-    U32 const chainSize = 1 << ms->cParams.chainLog;
-    U32 idx = ms->nextToUpdate;
-    U32 const minChain = chainSize < target - idx ? target - chainSize : idx;
-    U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG;
-    U32 const cacheSize = bucketSize - 1;
-    U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize;
-    U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts;
-
-    /* We know the hashtable is oversized by a factor of `bucketSize`.
-     * We are going to temporarily pretend `bucketSize == 1`, keeping only a
-     * single entry. We will use the rest of the space to construct a temporary
-     * chaintable.
-     */
-    U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
-    U32* const tmpHashTable = hashTable;
-    U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
-    U32 const tmpChainSize = (U32)((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
-    U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
-    U32 hashIdx;
-
-    assert(ms->cParams.chainLog <= 24);
-    assert(ms->cParams.hashLog > ms->cParams.chainLog);
-    assert(idx != 0);
-    assert(tmpMinChain <= minChain);
-
-    /* fill conventional hash table and conventional chain table */
-    for ( ; idx < target; idx++) {
-        U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch);
-        if (idx >= tmpMinChain) {
-            tmpChainTable[idx - tmpMinChain] = hashTable[h];
-        }
-        tmpHashTable[h] = idx;
-    }
-
-    /* sort chains into ddss chain table */
-    {
-        U32 chainPos = 0;
-        for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) {
-            U32 count;
-            U32 countBeyondMinChain = 0;
-            U32 i = tmpHashTable[hashIdx];
-            for (count = 0; i >= tmpMinChain && count < cacheSize; count++) {
-                /* skip through the chain to the first position that won't be
-                 * in the hash cache bucket */
-                if (i < minChain) {
-                    countBeyondMinChain++;
-                }
-                i = tmpChainTable[i - tmpMinChain];
-            }
-            if (count == cacheSize) {
-                for (count = 0; count < chainLimit;) {
-                    if (i < minChain) {
-                        if (!i || ++countBeyondMinChain > cacheSize) {
-                            /* only allow pulling `cacheSize` number of entries
-                             * into the cache or chainTable beyond `minChain`,
-                             * to replace the entries pulled out of the
-                             * chainTable into the cache. This lets us reach
-                             * back further without increasing the total number
-                             * of entries in the chainTable, guaranteeing the
-                             * DDSS chain table will fit into the space
-                             * allocated for the regular one. */
-                            break;
-                        }
-                    }
-                    chainTable[chainPos++] = i;
-                    count++;
-                    if (i < tmpMinChain) {
-                        break;
-                    }
-                    i = tmpChainTable[i - tmpMinChain];
-                }
-            } else {
-                count = 0;
-            }
-            if (count) {
-                tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count;
-            } else {
-                tmpHashTable[hashIdx] = 0;
-            }
-        }
-        assert(chainPos <= chainSize); /* I believe this is guaranteed... */
-    }
-
-    /* move chain pointers into the last entry of each hash bucket */
-    for (hashIdx = (1 << hashLog); hashIdx; ) {
-        U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG;
-        U32 const chainPackedPointer = tmpHashTable[hashIdx];
-        U32 i;
-        for (i = 0; i < cacheSize; i++) {
-            hashTable[bucketIdx + i] = 0;
-        }
-        hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer;
-    }
-
-    /* fill the buckets of the hash table */
-    for (idx = ms->nextToUpdate; idx < target; idx++) {
-        U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch)
-                   << ZSTD_LAZY_DDSS_BUCKET_LOG;
-        U32 i;
-        /* Shift hash cache down 1. */
-        for (i = cacheSize - 1; i; i--)
-            hashTable[h + i] = hashTable[h + i - 1];
-        hashTable[h] = idx;
-    }
-
-    ms->nextToUpdate = target;
-}
-
-/* Returns the longest match length found in the dedicated dict search structure.
- * If none are longer than the argument ml, then ml will be returned.
- */
-FORCE_INLINE_TEMPLATE
-size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nbAttempts,
-                                            const ZSTD_matchState_t* const dms,
-                                            const BYTE* const ip, const BYTE* const iLimit,
-                                            const BYTE* const prefixStart, const U32 curr,
-                                            const U32 dictLimit, const size_t ddsIdx) {
-    const U32 ddsLowestIndex  = dms->window.dictLimit;
-    const BYTE* const ddsBase = dms->window.base;
-    const BYTE* const ddsEnd  = dms->window.nextSrc;
-    const U32 ddsSize         = (U32)(ddsEnd - ddsBase);
-    const U32 ddsIndexDelta   = dictLimit - ddsSize;
-    const U32 bucketSize      = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
-    const U32 bucketLimit     = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
-    U32 ddsAttempt;
-    U32 matchIndex;
-
-    for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
-        PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
-    }
-
-    {
-        U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
-        U32 const chainIndex = chainPackedPointer >> 8;
-
-        PREFETCH_L1(&dms->chainTable[chainIndex]);
-    }
-
-    for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
-        size_t currentMl=0;
-        const BYTE* match;
-        matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
-        match = ddsBase + matchIndex;
-
-        if (!matchIndex) {
-            return ml;
-        }
-
-        /* guaranteed by table construction */
-        (void)ddsLowestIndex;
-        assert(matchIndex >= ddsLowestIndex);
-        assert(match+4 <= ddsEnd);
-        if (MEM_read32(match) == MEM_read32(ip)) {
-            /* assumption : matchIndex <= dictLimit-4 (by table construction) */
-            currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
-        }
-
-        /* save best solution */
-        if (currentMl > ml) {
-            ml = currentMl;
-            *offsetPtr = STORE_OFFSET(curr - (matchIndex + ddsIndexDelta));
-            if (ip+currentMl == iLimit) {
-                /* best possible, avoids read overflow on next attempt */
-                return ml;
-            }
-        }
-    }
-
-    {
-        U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
-        U32 chainIndex = chainPackedPointer >> 8;
-        U32 const chainLength = chainPackedPointer & 0xFF;
-        U32 const chainAttempts = nbAttempts - ddsAttempt;
-        U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
-        U32 chainAttempt;
-
-        for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
-            PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
-        }
-
-        for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
-            size_t currentMl=0;
-            const BYTE* match;
-            matchIndex = dms->chainTable[chainIndex];
-            match = ddsBase + matchIndex;
-
-            /* guaranteed by table construction */
-            assert(matchIndex >= ddsLowestIndex);
-            assert(match+4 <= ddsEnd);
-            if (MEM_read32(match) == MEM_read32(ip)) {
-                /* assumption : matchIndex <= dictLimit-4 (by table construction) */
-                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
-            }
-
-            /* save best solution */
-            if (currentMl > ml) {
-                ml = currentMl;
-                *offsetPtr = STORE_OFFSET(curr - (matchIndex + ddsIndexDelta));
-                if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
-            }
-        }
-    }
-    return ml;
-}
-
-
-/* *********************************
-*  Hash Chain
-***********************************/
-#define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & (mask)]
-
-/* Update chains up to ip (excluded)
-   Assumption : always within prefix (i.e. not within extDict) */
-FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
-                        ZSTD_matchState_t* ms,
-                        const ZSTD_compressionParameters* const cParams,
-                        const BYTE* ip, U32 const mls)
-{
-    U32* const hashTable  = ms->hashTable;
-    const U32 hashLog = cParams->hashLog;
-    U32* const chainTable = ms->chainTable;
-    const U32 chainMask = (1 << cParams->chainLog) - 1;
-    const BYTE* const base = ms->window.base;
-    const U32 target = (U32)(ip - base);
-    U32 idx = ms->nextToUpdate;
-
-    while(idx < target) { /* catch up */
-        size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
-        NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
-        hashTable[h] = idx;
-        idx++;
-    }
-
-    ms->nextToUpdate = target;
-    return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
-}
-
-U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
-    const ZSTD_compressionParameters* const cParams = &ms->cParams;
-    return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
-}
-
-/* inlining is important to hardwire a hot branch (template emulation) */
-FORCE_INLINE_TEMPLATE
-size_t ZSTD_HcFindBestMatch(
-                        ZSTD_matchState_t* ms,
-                        const BYTE* const ip, const BYTE* const iLimit,
-                        size_t* offsetPtr,
-                        const U32 mls, const ZSTD_dictMode_e dictMode)
-{
-    const ZSTD_compressionParameters* const cParams = &ms->cParams;
-    U32* const chainTable = ms->chainTable;
-    const U32 chainSize = (1 << cParams->chainLog);
-    const U32 chainMask = chainSize-1;
-    const BYTE* const base = ms->window.base;
-    const BYTE* const dictBase = ms->window.dictBase;
-    const U32 dictLimit = ms->window.dictLimit;
-    const BYTE* const prefixStart = base + dictLimit;
-    const BYTE* const dictEnd = dictBase + dictLimit;
-    const U32 curr = (U32)(ip-base);
-    const U32 maxDistance = 1U << cParams->windowLog;
-    const U32 lowestValid = ms->window.lowLimit;
-    const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
-    const U32 isDictionary = (ms->loadedDictEnd != 0);
-    const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
-    const U32 minChain = curr > chainSize ? curr - chainSize : 0;
-    U32 nbAttempts = 1U << cParams->searchLog;
-    size_t ml=4-1;
-
-    const ZSTD_matchState_t* const dms = ms->dictMatchState;
-    const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
-                         ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
-    const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
-                        ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
-
-    U32 matchIndex;
-
-    if (dictMode == ZSTD_dedicatedDictSearch) {
-        const U32* entry = &dms->hashTable[ddsIdx];
-        PREFETCH_L1(entry);
-    }
-
-    /* HC4 match finder */
-    matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
-
-    for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
-        size_t currentMl=0;
-        if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
-            const BYTE* const match = base + matchIndex;
-            assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
-            if (match[ml] == ip[ml])   /* potentially better */
-                currentMl = ZSTD_count(ip, match, iLimit);
-        } else {
-            const BYTE* const match = dictBase + matchIndex;
-            assert(match+4 <= dictEnd);
-            if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
-                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
-        }
-
-        /* save best solution */
-        if (currentMl > ml) {
-            ml = currentMl;
-            *offsetPtr = STORE_OFFSET(curr - matchIndex);
-            if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
-        }
-
-        if (matchIndex <= minChain) break;
-        matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
-    }
-
-    assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
-    if (dictMode == ZSTD_dedicatedDictSearch) {
-        ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts, dms,
-                                                  ip, iLimit, prefixStart, curr, dictLimit, ddsIdx);
-    } else if (dictMode == ZSTD_dictMatchState) {
-        const U32* const dmsChainTable = dms->chainTable;
-        const U32 dmsChainSize         = (1 << dms->cParams.chainLog);
-        const U32 dmsChainMask         = dmsChainSize - 1;
-        const U32 dmsLowestIndex       = dms->window.dictLimit;
-        const BYTE* const dmsBase      = dms->window.base;
-        const BYTE* const dmsEnd       = dms->window.nextSrc;
-        const U32 dmsSize              = (U32)(dmsEnd - dmsBase);
-        const U32 dmsIndexDelta        = dictLimit - dmsSize;
-        const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
-
-        matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
-
-        for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
-            size_t currentMl=0;
-            const BYTE* const match = dmsBase + matchIndex;
-            assert(match+4 <= dmsEnd);
-            if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
-                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
-
-            /* save best solution */
-            if (currentMl > ml) {
-                ml = currentMl;
-                assert(curr > matchIndex + dmsIndexDelta);
-                *offsetPtr = STORE_OFFSET(curr - (matchIndex + dmsIndexDelta));
-                if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
-            }
-
-            if (matchIndex <= dmsMinChain) break;
-
-            matchIndex = dmsChainTable[matchIndex & dmsChainMask];
-        }
-    }
-
-    return ml;
-}
-
-/* *********************************
-* (SIMD) Row-based matchfinder
-***********************************/
-/* Constants for row-based hash */
-#define ZSTD_ROW_HASH_TAG_OFFSET 16     /* byte offset of hashes in the match state's tagTable from the beginning of a row */
-#define ZSTD_ROW_HASH_TAG_BITS 8        /* nb bits to use for the tag */
-#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
-#define ZSTD_ROW_HASH_MAX_ENTRIES 64    /* absolute maximum number of entries per row, for all configurations */
-
-#define ZSTD_ROW_HASH_CACHE_MASK (ZSTD_ROW_HASH_CACHE_SIZE - 1)
-
-typedef U64 ZSTD_VecMask;   /* Clarifies when we are interacting with a U64 representing a mask of matches */
-
-/* ZSTD_VecMask_next():
- * Starting from the LSB, returns the idx of the next non-zero bit.
- * Basically counting the nb of trailing zeroes.
- */
-static U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
-    assert(val != 0);
-#   if defined(_MSC_VER) && defined(_WIN64)
-        if (val != 0) {
-            unsigned long r;
-            _BitScanForward64(&r, val);
-            return (U32)(r);
-        } else {
-            /* Should not reach this code path */
-            __assume(0);
-        }
-#   elif (defined(__GNUC__) && ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))
-    if (sizeof(size_t) == 4) {
-        U32 mostSignificantWord = (U32)(val >> 32);
-        U32 leastSignificantWord = (U32)val;
-        if (leastSignificantWord == 0) {
-            return 32 + (U32)__builtin_ctz(mostSignificantWord);
-        } else {
-            return (U32)__builtin_ctz(leastSignificantWord);
-        }
-    } else {
-        return (U32)__builtin_ctzll(val);
-    }
-#   else
-    /* Software ctz version: http://aggregate.org/MAGIC/#Trailing%20Zero%20Count
-     * and: https://stackoverflow.com/questions/2709430/count-number-of-bits-in-a-64-bit-long-big-integer
-     */
-    val = ~val & (val - 1ULL); /* Lowest set bit mask */
-    val = val - ((val >> 1) & 0x5555555555555555);
-    val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL);
-    return (U32)((((val + (val >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56);
-#   endif
-}
-
-/* ZSTD_rotateRight_*():
- * Rotates a bitfield to the right by "count" bits.
- * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
- */
-FORCE_INLINE_TEMPLATE
-U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
-    assert(count < 64);
-    count &= 0x3F; /* for fickle pattern recognition */
-    return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
-}
-
-FORCE_INLINE_TEMPLATE
-U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
-    assert(count < 32);
-    count &= 0x1F; /* for fickle pattern recognition */
-    return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
-}
-
-FORCE_INLINE_TEMPLATE
-U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
-    assert(count < 16);
-    count &= 0x0F; /* for fickle pattern recognition */
-    return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
-}
-
-/* ZSTD_row_nextIndex():
- * Returns the next index to insert at within a tagTable row, and updates the "head"
- * value to reflect the update. Essentially cycles backwards from [0, {entries per row})
- */
-FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
-  U32 const next = (*tagRow - 1) & rowMask;
-  *tagRow = (BYTE)next;
-  return next;
-}
-
-/* ZSTD_isAligned():
- * Checks that a pointer is aligned to "align" bytes which must be a power of 2.
- */
-MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) {
-    assert((align & (align - 1)) == 0);
-    return (((size_t)ptr) & (align - 1)) == 0;
-}
-
-/* ZSTD_row_prefetch():
- * Performs prefetching for the hashTable and tagTable at a given row.
- */
-FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) {
-    PREFETCH_L1(hashTable + relRow);
-    if (rowLog >= 5) {
-        PREFETCH_L1(hashTable + relRow + 16);
-        /* Note: prefetching more of the hash table does not appear to be beneficial for 128-entry rows */
-    }
-    PREFETCH_L1(tagTable + relRow);
-    if (rowLog == 6) {
-        PREFETCH_L1(tagTable + relRow + 32);
-    }
-    assert(rowLog == 4 || rowLog == 5 || rowLog == 6);
-    assert(ZSTD_isAligned(hashTable + relRow, 64));                 /* prefetched hash row always 64-byte aligned */
-    assert(ZSTD_isAligned(tagTable + relRow, (size_t)1 << rowLog)); /* prefetched tagRow sits on correct multiple of bytes (32,64,128) */
-}
-
-/* ZSTD_row_fillHashCache():
- * Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries,
- * but not beyond iLimit.
- */
-FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
-                                   U32 const rowLog, U32 const mls,
-                                   U32 idx, const BYTE* const iLimit)
-{
-    U32 const* const hashTable = ms->hashTable;
-    U16 const* const tagTable = ms->tagTable;
-    U32 const hashLog = ms->rowHashLog;
-    U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1);
-    U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);
-
-    for (; idx < lim; ++idx) {
-        U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
-        U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
-        ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
-        ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash;
-    }
-
-    DEBUGLOG(6, "ZSTD_row_fillHashCache(): [%u %u %u %u %u %u %u %u]", ms->hashCache[0], ms->hashCache[1],
-                                                     ms->hashCache[2], ms->hashCache[3], ms->hashCache[4],
-                                                     ms->hashCache[5], ms->hashCache[6], ms->hashCache[7]);
-}
-
-/* ZSTD_row_nextCachedHash():
- * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at
- * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
- */
-FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
-                                                  U16 const* tagTable, BYTE const* base,
-                                                  U32 idx, U32 const hashLog,
-                                                  U32 const rowLog, U32 const mls)
-{
-    U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
-    U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
-    ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
-    {   U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK];
-        cache[idx & ZSTD_ROW_HASH_CACHE_MASK] = newHash;
-        return hash;
-    }
-}
-
-/* ZSTD_row_update_internalImpl():
- * Updates the hash table with positions starting from updateStartIdx until updateEndIdx.
- */
-FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
-                                                        U32 updateStartIdx, U32 const updateEndIdx,
-                                                        U32 const mls, U32 const rowLog,
-                                                        U32 const rowMask, U32 const useCache)
-{
-    U32* const hashTable = ms->hashTable;
-    U16* const tagTable = ms->tagTable;
-    U32 const hashLog = ms->rowHashLog;
-    const BYTE* const base = ms->window.base;
-
-    DEBUGLOG(6, "ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx);
-    for (; updateStartIdx < updateEndIdx; ++updateStartIdx) {
-        U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls)
-                                  : (U32)ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
-        U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
-        U32* const row = hashTable + relRow;
-        BYTE* tagRow = (BYTE*)(tagTable + relRow);  /* Though tagTable is laid out as a table of U16, each tag is only 1 byte.
-                                                       Explicit cast allows us to get exact desired position within each row */
-        U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
-
-        assert(hash == ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls));
-        ((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK;
-        row[pos] = updateStartIdx;
-    }
-}
-
-/* ZSTD_row_update_internal():
- * Inserts the byte at ip into the appropriate position in the hash table, and updates ms->nextToUpdate.
- * Skips sections of long matches as is necessary.
- */
-FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
-                                                    U32 const mls, U32 const rowLog,
-                                                    U32 const rowMask, U32 const useCache)
-{
-    U32 idx = ms->nextToUpdate;
-    const BYTE* const base = ms->window.base;
-    const U32 target = (U32)(ip - base);
-    const U32 kSkipThreshold = 384;
-    const U32 kMaxMatchStartPositionsToUpdate = 96;
-    const U32 kMaxMatchEndPositionsToUpdate = 32;
-
-    if (useCache) {
-        /* Only skip positions when using hash cache, i.e.
-         * if we are loading a dict, don't skip anything.
-         * If we decide to skip, then we only update a set number
-         * of positions at the beginning and end of the match.
-         */
-        if (UNLIKELY(target - idx > kSkipThreshold)) {
-            U32 const bound = idx + kMaxMatchStartPositionsToUpdate;
-            ZSTD_row_update_internalImpl(ms, idx, bound, mls, rowLog, rowMask, useCache);
-            idx = target - kMaxMatchEndPositionsToUpdate;
-            ZSTD_row_fillHashCache(ms, base, rowLog, mls, idx, ip+1);
-        }
-    }
-    assert(target >= idx);
-    ZSTD_row_update_internalImpl(ms, idx, target, mls, rowLog, rowMask, useCache);
-    ms->nextToUpdate = target;
-}
-
-/* ZSTD_row_update():
- * External wrapper for ZSTD_row_update_internal(). Used for filling the hashtable during dictionary
- * processing.
- */
-void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) {
-    const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
-    const U32 rowMask = (1u << rowLog) - 1;
-    const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */);
-
-    DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog);
-    ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* dont use cache */);
-}
-
-#if defined(ZSTD_ARCH_X86_SSE2)
-FORCE_INLINE_TEMPLATE ZSTD_VecMask
-ZSTD_row_getSSEMask(int nbChunks, const BYTE* const src, const BYTE tag, const U32 head)
-{
-    const __m128i comparisonMask = _mm_set1_epi8((char)tag);
-    int matches[4] = {0};
-    int i;
-    assert(nbChunks == 1 || nbChunks == 2 || nbChunks == 4);
-    for (i=0; i<nbChunks; i++) {
-        const __m128i chunk = _mm_loadu_si128((const __m128i*)(const void*)(src + 16*i));
-        const __m128i equalMask = _mm_cmpeq_epi8(chunk, comparisonMask);
-        matches[i] = _mm_movemask_epi8(equalMask);
-    }
-    if (nbChunks == 1) return ZSTD_rotateRight_U16((U16)matches[0], head);
-    if (nbChunks == 2) return ZSTD_rotateRight_U32((U32)matches[1] << 16 | (U32)matches[0], head);
-    assert(nbChunks == 4);
-    return ZSTD_rotateRight_U64((U64)matches[3] << 48 | (U64)matches[2] << 32 | (U64)matches[1] << 16 | (U64)matches[0], head);
-}
-#endif
-
-/* Returns a ZSTD_VecMask (U32) that has the nth bit set to 1 if the newly-computed "tag" matches
- * the hash at the nth position in a row of the tagTable.
- * Each row is a circular buffer beginning at the value of "head". So we must rotate the "matches" bitfield
- * to match up with the actual layout of the entries within the hashTable */
-FORCE_INLINE_TEMPLATE ZSTD_VecMask
-ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head, const U32 rowEntries)
-{
-    const BYTE* const src = tagRow + ZSTD_ROW_HASH_TAG_OFFSET;
-    assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
-    assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
-
-#if defined(ZSTD_ARCH_X86_SSE2)
-
-    return ZSTD_row_getSSEMask(rowEntries / 16, src, tag, head);
-
-#else /* SW or NEON-LE */
-
-# if defined(ZSTD_ARCH_ARM_NEON)
-  /* This NEON path only works for little endian - otherwise use SWAR below */
-    if (MEM_isLittleEndian()) {
-        if (rowEntries == 16) {
-            const uint8x16_t chunk = vld1q_u8(src);
-            const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag)));
-            const uint16x8_t t0 = vshlq_n_u16(equalMask, 7);
-            const uint32x4_t t1 = vreinterpretq_u32_u16(vsriq_n_u16(t0, t0, 14));
-            const uint64x2_t t2 = vreinterpretq_u64_u32(vshrq_n_u32(t1, 14));
-            const uint8x16_t t3 = vreinterpretq_u8_u64(vsraq_n_u64(t2, t2, 28));
-            const U16 hi = (U16)vgetq_lane_u8(t3, 8);
-            const U16 lo = (U16)vgetq_lane_u8(t3, 0);
-            return ZSTD_rotateRight_U16((hi << 8) | lo, head);
-        } else if (rowEntries == 32) {
-            const uint16x8x2_t chunk = vld2q_u16((const U16*)(const void*)src);
-            const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]);
-            const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]);
-            const uint8x16_t equalMask0 = vceqq_u8(chunk0, vdupq_n_u8(tag));
-            const uint8x16_t equalMask1 = vceqq_u8(chunk1, vdupq_n_u8(tag));
-            const int8x8_t pack0 = vqmovn_s16(vreinterpretq_s16_u8(equalMask0));
-            const int8x8_t pack1 = vqmovn_s16(vreinterpretq_s16_u8(equalMask1));
-            const uint8x8_t t0 = vreinterpret_u8_s8(pack0);
-            const uint8x8_t t1 = vreinterpret_u8_s8(pack1);
-            const uint8x8_t t2 = vsri_n_u8(t1, t0, 2);
-            const uint8x8x2_t t3 = vuzp_u8(t2, t0);
-            const uint8x8_t t4 = vsri_n_u8(t3.val[1], t3.val[0], 4);
-            const U32 matches = vget_lane_u32(vreinterpret_u32_u8(t4), 0);
-            return ZSTD_rotateRight_U32(matches, head);
-        } else { /* rowEntries == 64 */
-            const uint8x16x4_t chunk = vld4q_u8(src);
-            const uint8x16_t dup = vdupq_n_u8(tag);
-            const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup);
-            const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup);
-            const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup);
-            const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup);
-
-            const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1);
-            const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1);
-            const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2);
-            const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4);
-            const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4);
-            const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0);
-            return ZSTD_rotateRight_U64(matches, head);
-        }
-    }
-# endif /* ZSTD_ARCH_ARM_NEON */
-    /* SWAR */
-    {   const size_t chunkSize = sizeof(size_t);
-        const size_t shiftAmount = ((chunkSize * 8) - chunkSize);
-        const size_t xFF = ~((size_t)0);
-        const size_t x01 = xFF / 0xFF;
-        const size_t x80 = x01 << 7;
-        const size_t splatChar = tag * x01;
-        ZSTD_VecMask matches = 0;
-        int i = rowEntries - chunkSize;
-        assert((sizeof(size_t) == 4) || (sizeof(size_t) == 8));
-        if (MEM_isLittleEndian()) { /* runtime check so have two loops */
-            const size_t extractMagic = (xFF / 0x7F) >> chunkSize;
-            do {
-                size_t chunk = MEM_readST(&src[i]);
-                chunk ^= splatChar;
-                chunk = (((chunk | x80) - x01) | chunk) & x80;
-                matches <<= chunkSize;
-                matches |= (chunk * extractMagic) >> shiftAmount;
-                i -= chunkSize;
-            } while (i >= 0);
-        } else { /* big endian: reverse bits during extraction */
-            const size_t msb = xFF ^ (xFF >> 1);
-            const size_t extractMagic = (msb / 0x1FF) | msb;
-            do {
-                size_t chunk = MEM_readST(&src[i]);
-                chunk ^= splatChar;
-                chunk = (((chunk | x80) - x01) | chunk) & x80;
-                matches <<= chunkSize;
-                matches |= ((chunk >> 7) * extractMagic) >> shiftAmount;
-                i -= chunkSize;
-            } while (i >= 0);
-        }
-        matches = ~matches;
-        if (rowEntries == 16) {
-            return ZSTD_rotateRight_U16((U16)matches, head);
-        } else if (rowEntries == 32) {
-            return ZSTD_rotateRight_U32((U32)matches, head);
-        } else {
-            return ZSTD_rotateRight_U64((U64)matches, head);
-        }
-    }
-#endif
-}
-
-/* The high-level approach of the SIMD row based match finder is as follows:
- * - Figure out where to insert the new entry:
- *      - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag"
- *      - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines
- *        which row to insert into.
- *      - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can
- *        be considered as a circular buffer with a "head" index that resides in the tagTable.
- *      - Also insert the "tag" into the equivalent row and position in the tagTable.
- *          - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry.
- *                  The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively,
- *                  for alignment/performance reasons, leaving some bytes unused.
- * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and
- *   generate a bitfield that we can cycle through to check the collisions in the hash table.
- * - Pick the longest match.
- */
-FORCE_INLINE_TEMPLATE
-size_t ZSTD_RowFindBestMatch(
-                        ZSTD_matchState_t* ms,
-                        const BYTE* const ip, const BYTE* const iLimit,
-                        size_t* offsetPtr,
-                        const U32 mls, const ZSTD_dictMode_e dictMode,
-                        const U32 rowLog)
-{
-    U32* const hashTable = ms->hashTable;
-    U16* const tagTable = ms->tagTable;
-    U32* const hashCache = ms->hashCache;
-    const U32 hashLog = ms->rowHashLog;
-    const ZSTD_compressionParameters* const cParams = &ms->cParams;
-    const BYTE* const base = ms->window.base;
-    const BYTE* const dictBase = ms->window.dictBase;
-    const U32 dictLimit = ms->window.dictLimit;
-    const BYTE* const prefixStart = base + dictLimit;
-    const BYTE* const dictEnd = dictBase + dictLimit;
-    const U32 curr = (U32)(ip-base);
-    const U32 maxDistance = 1U << cParams->windowLog;
-    const U32 lowestValid = ms->window.lowLimit;
-    const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
-    const U32 isDictionary = (ms->loadedDictEnd != 0);
-    const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
-    const U32 rowEntries = (1U << rowLog);
-    const U32 rowMask = rowEntries - 1;
-    const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */
-    U32 nbAttempts = 1U << cappedSearchLog;
-    size_t ml=4-1;
-
-    /* DMS/DDS variables that may be referenced laster */
-    const ZSTD_matchState_t* const dms = ms->dictMatchState;
-
-    /* Initialize the following variables to satisfy static analyzer */
-    size_t ddsIdx = 0;
-    U32 ddsExtraAttempts = 0; /* cctx hash tables are limited in searches, but allow extra searches into DDS */
-    U32 dmsTag = 0;
-    U32* dmsRow = NULL;
-    BYTE* dmsTagRow = NULL;
-
-    if (dictMode == ZSTD_dedicatedDictSearch) {
-        const U32 ddsHashLog = dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
-        {   /* Prefetch DDS hashtable entry */
-            ddsIdx = ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG;
-            PREFETCH_L1(&dms->hashTable[ddsIdx]);
-        }
-        ddsExtraAttempts = cParams->searchLog > rowLog ? 1U << (cParams->searchLog - rowLog) : 0;
-    }
-
-    if (dictMode == ZSTD_dictMatchState) {
-        /* Prefetch DMS rows */
-        U32* const dmsHashTable = dms->hashTable;
-        U16* const dmsTagTable = dms->tagTable;
-        U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
-        U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
-        dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
-        dmsTagRow = (BYTE*)(dmsTagTable + dmsRelRow);
-        dmsRow = dmsHashTable + dmsRelRow;
-        ZSTD_row_prefetch(dmsHashTable, dmsTagTable, dmsRelRow, rowLog);
-    }
-
-    /* Update the hashTable and tagTable up to (but not including) ip */
-    ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
-    {   /* Get the hash for ip, compute the appropriate row */
-        U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls);
-        U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
-        U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK;
-        U32* const row = hashTable + relRow;
-        BYTE* tagRow = (BYTE*)(tagTable + relRow);
-        U32 const head = *tagRow & rowMask;
-        U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES];
-        size_t numMatches = 0;
-        size_t currMatch = 0;
-        ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, head, rowEntries);
-
-        /* Cycle through the matches and prefetch */
-        for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
-            U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
-            U32 const matchIndex = row[matchPos];
-            assert(numMatches < rowEntries);
-            if (matchIndex < lowLimit)
-                break;
-            if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
-                PREFETCH_L1(base + matchIndex);
-            } else {
-                PREFETCH_L1(dictBase + matchIndex);
-            }
-            matchBuffer[numMatches++] = matchIndex;
-        }
-
-        /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop
-           in ZSTD_row_update_internal() at the next search. */
-        {
-            U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
-            tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag;
-            row[pos] = ms->nextToUpdate++;
-        }
-
-        /* Return the longest match */
-        for (; currMatch < numMatches; ++currMatch) {
-            U32 const matchIndex = matchBuffer[currMatch];
-            size_t currentMl=0;
-            assert(matchIndex < curr);
-            assert(matchIndex >= lowLimit);
-
-            if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
-                const BYTE* const match = base + matchIndex;
-                assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
-                if (match[ml] == ip[ml])   /* potentially better */
-                    currentMl = ZSTD_count(ip, match, iLimit);
-            } else {
-                const BYTE* const match = dictBase + matchIndex;
-                assert(match+4 <= dictEnd);
-                if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
-                    currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
-            }
-
-            /* Save best solution */
-            if (currentMl > ml) {
-                ml = currentMl;
-                *offsetPtr = STORE_OFFSET(curr - matchIndex);
-                if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
-            }
-        }
-    }
-
-    assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
-    if (dictMode == ZSTD_dedicatedDictSearch) {
-        ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts + ddsExtraAttempts, dms,
-                                                  ip, iLimit, prefixStart, curr, dictLimit, ddsIdx);
-    } else if (dictMode == ZSTD_dictMatchState) {
-        /* TODO: Measure and potentially add prefetching to DMS */
-        const U32 dmsLowestIndex       = dms->window.dictLimit;
-        const BYTE* const dmsBase      = dms->window.base;
-        const BYTE* const dmsEnd       = dms->window.nextSrc;
-        const U32 dmsSize              = (U32)(dmsEnd - dmsBase);
-        const U32 dmsIndexDelta        = dictLimit - dmsSize;
-
-        {   U32 const head = *dmsTagRow & rowMask;
-            U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES];
-            size_t numMatches = 0;
-            size_t currMatch = 0;
-            ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, head, rowEntries);
-
-            for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
-                U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
-                U32 const matchIndex = dmsRow[matchPos];
-                if (matchIndex < dmsLowestIndex)
-                    break;
-                PREFETCH_L1(dmsBase + matchIndex);
-                matchBuffer[numMatches++] = matchIndex;
-            }
-
-            /* Return the longest match */
-            for (; currMatch < numMatches; ++currMatch) {
-                U32 const matchIndex = matchBuffer[currMatch];
-                size_t currentMl=0;
-                assert(matchIndex >= dmsLowestIndex);
-                assert(matchIndex < curr);
-
-                {   const BYTE* const match = dmsBase + matchIndex;
-                    assert(match+4 <= dmsEnd);
-                    if (MEM_read32(match) == MEM_read32(ip))
-                        currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
-                }
-
-                if (currentMl > ml) {
-                    ml = currentMl;
-                    assert(curr > matchIndex + dmsIndexDelta);
-                    *offsetPtr = STORE_OFFSET(curr - (matchIndex + dmsIndexDelta));
-                    if (ip+currentMl == iLimit) break;
-                }
-            }
-        }
-    }
-    return ml;
-}
-
-
-typedef size_t (*searchMax_f)(
-                    ZSTD_matchState_t* ms,
-                    const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
-
-/**
- * This struct contains the functions necessary for lazy to search.
- * Currently, that is only searchMax. However, it is still valuable to have the
- * VTable because this makes it easier to add more functions to the VTable later.
- *
- * TODO: The start of the search function involves loading and calculating a
- * bunch of constants from the ZSTD_matchState_t. These computations could be
- * done in an initialization function, and saved somewhere in the match state.
- * Then we could pass a pointer to the saved state instead of the match state,
- * and avoid duplicate computations.
- *
- * TODO: Move the match re-winding into searchMax. This improves compression
- * ratio, and unlocks further simplifications with the next TODO.
- *
- * TODO: Try moving the repcode search into searchMax. After the re-winding
- * and repcode search are in searchMax, there is no more logic in the match
- * finder loop that requires knowledge about the dictMode. So we should be
- * able to avoid force inlining it, and we can join the extDict loop with
- * the single segment loop. It should go in searchMax instead of its own
- * function to avoid having multiple virtual function calls per search.
- */
-typedef struct {
-    searchMax_f searchMax;
-} ZSTD_LazyVTable;
-
-#define GEN_ZSTD_BT_VTABLE(dictMode, mls)                                             \
-    static size_t ZSTD_BtFindBestMatch_##dictMode##_##mls(                            \
-            ZSTD_matchState_t* ms,                                                    \
-            const BYTE* ip, const BYTE* const iLimit,                                 \
-            size_t* offsetPtr)                                                        \
-    {                                                                                 \
-        assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls);                          \
-        return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode); \
-    }                                                                                 \
-    static const ZSTD_LazyVTable ZSTD_BtVTable_##dictMode##_##mls = {                 \
-        ZSTD_BtFindBestMatch_##dictMode##_##mls                                       \
-    };
-
-#define GEN_ZSTD_HC_VTABLE(dictMode, mls)                                             \
-    static size_t ZSTD_HcFindBestMatch_##dictMode##_##mls(                            \
-            ZSTD_matchState_t* ms,                                                    \
-            const BYTE* ip, const BYTE* const iLimit,                                 \
-            size_t* offsetPtr)                                                        \
-    {                                                                                 \
-        assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls);                          \
-        return ZSTD_HcFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode); \
-    }                                                                                 \
-    static const ZSTD_LazyVTable ZSTD_HcVTable_##dictMode##_##mls = {                 \
-        ZSTD_HcFindBestMatch_##dictMode##_##mls                                       \
-    };
-
-#define GEN_ZSTD_ROW_VTABLE(dictMode, mls, rowLog)                                             \
-    static size_t ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog(                         \
-            ZSTD_matchState_t* ms,                                                             \
-            const BYTE* ip, const BYTE* const iLimit,                                          \
-            size_t* offsetPtr)                                                                 \
-    {                                                                                          \
-        assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls);                                   \
-        assert(MAX(4, MIN(6, ms->cParams.searchLog)) == rowLog);                               \
-        return ZSTD_RowFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode, rowLog); \
-    }                                                                                          \
-    static const ZSTD_LazyVTable ZSTD_RowVTable_##dictMode##_##mls##_##rowLog = {              \
-        ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog                                    \
-    };
-
-#define ZSTD_FOR_EACH_ROWLOG(X, dictMode, mls) \
-    X(dictMode, mls, 4)                        \
-    X(dictMode, mls, 5)                        \
-    X(dictMode, mls, 6)
-
-#define ZSTD_FOR_EACH_MLS_ROWLOG(X, dictMode) \
-    ZSTD_FOR_EACH_ROWLOG(X, dictMode, 4)      \
-    ZSTD_FOR_EACH_ROWLOG(X, dictMode, 5)      \
-    ZSTD_FOR_EACH_ROWLOG(X, dictMode, 6)
-
-#define ZSTD_FOR_EACH_MLS(X, dictMode) \
-    X(dictMode, 4)                     \
-    X(dictMode, 5)                     \
-    X(dictMode, 6)
-
-#define ZSTD_FOR_EACH_DICT_MODE(X, ...) \
-    X(__VA_ARGS__, noDict)              \
-    X(__VA_ARGS__, extDict)             \
-    X(__VA_ARGS__, dictMatchState)      \
-    X(__VA_ARGS__, dedicatedDictSearch)
-
-/* Generate Row VTables for each combination of (dictMode, mls, rowLog) */
-ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS_ROWLOG, GEN_ZSTD_ROW_VTABLE)
-/* Generate Binary Tree VTables for each combination of (dictMode, mls) */
-ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_BT_VTABLE)
-/* Generate Hash Chain VTables for each combination of (dictMode, mls) */
-ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_HC_VTABLE)
-
-#define GEN_ZSTD_BT_VTABLE_ARRAY(dictMode) \
-    {                                      \
-        &ZSTD_BtVTable_##dictMode##_4,     \
-        &ZSTD_BtVTable_##dictMode##_5,     \
-        &ZSTD_BtVTable_##dictMode##_6      \
-    }
-
-#define GEN_ZSTD_HC_VTABLE_ARRAY(dictMode) \
-    {                                      \
-        &ZSTD_HcVTable_##dictMode##_4,     \
-        &ZSTD_HcVTable_##dictMode##_5,     \
-        &ZSTD_HcVTable_##dictMode##_6      \
-    }
-
-#define GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, mls) \
-    {                                             \
-        &ZSTD_RowVTable_##dictMode##_##mls##_4,   \
-        &ZSTD_RowVTable_##dictMode##_##mls##_5,   \
-        &ZSTD_RowVTable_##dictMode##_##mls##_6    \
-    }
-
-#define GEN_ZSTD_ROW_VTABLE_ARRAY(dictMode)      \
-    {                                            \
-        GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, 4), \
-        GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, 5), \
-        GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, 6)  \
-    }
-
-#define GEN_ZSTD_VTABLE_ARRAY(X) \
-    {                            \
-        X(noDict),               \
-        X(extDict),              \
-        X(dictMatchState),       \
-        X(dedicatedDictSearch)   \
-    }
-
-/* *******************************
-*  Common parser - lazy strategy
-*********************************/
-typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searchMethod_e;
-
-/**
- * This table is indexed first by the four ZSTD_dictMode_e values, and then
- * by the two searchMethod_e values. NULLs are placed for configurations
- * that should never occur (extDict modes go to the other implementation
- * below and there is no DDSS for binary tree search yet).
- */
-
-static ZSTD_LazyVTable const*
-ZSTD_selectLazyVTable(ZSTD_matchState_t const* ms, searchMethod_e searchMethod, ZSTD_dictMode_e dictMode)
-{
-    /* Fill the Hc/Bt VTable arrays with the right functions for the (dictMode, mls) combination. */
-    ZSTD_LazyVTable const* const hcVTables[4][3] = GEN_ZSTD_VTABLE_ARRAY(GEN_ZSTD_HC_VTABLE_ARRAY);
-    ZSTD_LazyVTable const* const btVTables[4][3] = GEN_ZSTD_VTABLE_ARRAY(GEN_ZSTD_BT_VTABLE_ARRAY);
-    /* Fill the Row VTable array with the right functions for the (dictMode, mls, rowLog) combination. */
-    ZSTD_LazyVTable const* const rowVTables[4][3][3] = GEN_ZSTD_VTABLE_ARRAY(GEN_ZSTD_ROW_VTABLE_ARRAY);
-
-    U32 const mls = MAX(4, MIN(6, ms->cParams.minMatch));
-    U32 const rowLog = MAX(4, MIN(6, ms->cParams.searchLog));
-    switch (searchMethod) {
-        case search_hashChain:
-            return hcVTables[dictMode][mls - 4];
-        case search_binaryTree:
-            return btVTables[dictMode][mls - 4];
-        case search_rowHash:
-            return rowVTables[dictMode][mls - 4][rowLog - 4];
-        default:
-            return NULL;
-    }
-}
-
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_compressBlock_lazy_generic(
-                        ZSTD_matchState_t* ms, seqStore_t* seqStore,
-                        U32 rep[ZSTD_REP_NUM],
-                        const void* src, size_t srcSize,
-                        const searchMethod_e searchMethod, const U32 depth,
-                        ZSTD_dictMode_e const dictMode)
-{
-    const BYTE* const istart = (const BYTE*)src;
-    const BYTE* ip = istart;
-    const BYTE* anchor = istart;
-    const BYTE* const iend = istart + srcSize;
-    const BYTE* const ilimit = (searchMethod == search_rowHash) ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8;
-    const BYTE* const base = ms->window.base;
-    const U32 prefixLowestIndex = ms->window.dictLimit;
-    const BYTE* const prefixLowest = base + prefixLowestIndex;
-
-    searchMax_f const searchMax = ZSTD_selectLazyVTable(ms, searchMethod, dictMode)->searchMax;
-    U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
-
-    const int isDMS = dictMode == ZSTD_dictMatchState;
-    const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
-    const int isDxS = isDMS || isDDS;
-    const ZSTD_matchState_t* const dms = ms->dictMatchState;
-    const U32 dictLowestIndex      = isDxS ? dms->window.dictLimit : 0;
-    const BYTE* const dictBase     = isDxS ? dms->window.base : NULL;
-    const BYTE* const dictLowest   = isDxS ? dictBase + dictLowestIndex : NULL;
-    const BYTE* const dictEnd      = isDxS ? dms->window.nextSrc : NULL;
-    const U32 dictIndexDelta       = isDxS ?
-                                     prefixLowestIndex - (U32)(dictEnd - dictBase) :
-                                     0;
-    const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
-
-    assert(searchMax != NULL);
-
-    DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (U32)dictMode, (U32)searchMethod);
-    ip += (dictAndPrefixLength == 0);
-    if (dictMode == ZSTD_noDict) {
-        U32 const curr = (U32)(ip - base);
-        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
-        U32 const maxRep = curr - windowLow;
-        if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
-        if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
-    }
-    if (isDxS) {
-        /* dictMatchState repCode checks don't currently handle repCode == 0
-         * disabling. */
-        assert(offset_1 <= dictAndPrefixLength);
-        assert(offset_2 <= dictAndPrefixLength);
-    }
-
-    if (searchMethod == search_rowHash) {
-        const U32 rowLog = MAX(4, MIN(6, ms->cParams.searchLog));
-        ZSTD_row_fillHashCache(ms, base, rowLog,
-                            MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
-                            ms->nextToUpdate, ilimit);
-    }
-
-    /* Match Loop */
-#if defined(__GNUC__) && defined(__x86_64__)
-    /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
-     * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
-     */
-    __asm__(".p2align 5");
-#endif
-    while (ip < ilimit) {
-        size_t matchLength=0;
-        size_t offcode=STORE_REPCODE_1;
-        const BYTE* start=ip+1;
-        DEBUGLOG(7, "search baseline (depth 0)");
-
-        /* check repCode */
-        if (isDxS) {
-            const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
-            const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch)
-                                && repIndex < prefixLowestIndex) ?
-                                   dictBase + (repIndex - dictIndexDelta) :
-                                   base + repIndex;
-            if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
-                && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
-                const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
-                matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
-                if (depth==0) goto _storeSequence;
-            }
-        }
-        if ( dictMode == ZSTD_noDict
-          && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
-            matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
-            if (depth==0) goto _storeSequence;
-        }
-
-        /* first search (depth 0) */
-        {   size_t offsetFound = 999999999;
-            size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
-            if (ml2 > matchLength)
-                matchLength = ml2, start = ip, offcode=offsetFound;
-        }
-
-        if (matchLength < 4) {
-            ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
-            continue;
-        }
-
-        /* let's try to find a better solution */
-        if (depth>=1)
-        while (ip<ilimit) {
-            DEBUGLOG(7, "search depth 1");
-            ip ++;
-            if ( (dictMode == ZSTD_noDict)
-              && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
-                size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
-                int const gain2 = (int)(mlRep * 3);
-                int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
-                if ((mlRep >= 4) && (gain2 > gain1))
-                    matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
-            }
-            if (isDxS) {
-                const U32 repIndex = (U32)(ip - base) - offset_1;
-                const BYTE* repMatch = repIndex < prefixLowestIndex ?
-                               dictBase + (repIndex - dictIndexDelta) :
-                               base + repIndex;
-                if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
-                    && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
-                    const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
-                    size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
-                    int const gain2 = (int)(mlRep * 3);
-                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
-                    if ((mlRep >= 4) && (gain2 > gain1))
-                        matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
-                }
-            }
-            {   size_t offset2=999999999;
-                size_t const ml2 = searchMax(ms, ip, iend, &offset2);
-                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2)));   /* raw approx */
-                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4);
-                if ((ml2 >= 4) && (gain2 > gain1)) {
-                    matchLength = ml2, offcode = offset2, start = ip;
-                    continue;   /* search a better one */
-            }   }
-
-            /* let's find an even better one */
-            if ((depth==2) && (ip<ilimit)) {
-                DEBUGLOG(7, "search depth 2");
-                ip ++;
-                if ( (dictMode == ZSTD_noDict)
-                  && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
-                    size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
-                    int const gain2 = (int)(mlRep * 4);
-                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
-                    if ((mlRep >= 4) && (gain2 > gain1))
-                        matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
-                }
-                if (isDxS) {
-                    const U32 repIndex = (U32)(ip - base) - offset_1;
-                    const BYTE* repMatch = repIndex < prefixLowestIndex ?
-                                   dictBase + (repIndex - dictIndexDelta) :
-                                   base + repIndex;
-                    if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
-                        && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
-                        const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
-                        size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
-                        int const gain2 = (int)(mlRep * 4);
-                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
-                        if ((mlRep >= 4) && (gain2 > gain1))
-                            matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
-                    }
-                }
-                {   size_t offset2=999999999;
-                    size_t const ml2 = searchMax(ms, ip, iend, &offset2);
-                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2)));   /* raw approx */
-                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 7);
-                    if ((ml2 >= 4) && (gain2 > gain1)) {
-                        matchLength = ml2, offcode = offset2, start = ip;
-                        continue;
-            }   }   }
-            break;  /* nothing found : store previous solution */
-        }
-
-        /* NOTE:
-         * Pay attention that `start[-value]` can lead to strange undefined behavior
-         * notably if `value` is unsigned, resulting in a large positive `-value`.
-         */
-        /* catch up */
-        if (STORED_IS_OFFSET(offcode)) {
-            if (dictMode == ZSTD_noDict) {
-                while ( ((start > anchor) & (start - STORED_OFFSET(offcode) > prefixLowest))
-                     && (start[-1] == (start-STORED_OFFSET(offcode))[-1]) )  /* only search for offset within prefix */
-                    { start--; matchLength++; }
-            }
-            if (isDxS) {
-                U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode));
-                const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
-                const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
-                while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
-            }
-            offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offcode);
-        }
-        /* store sequence */
-_storeSequence:
-        {   size_t const litLength = (size_t)(start - anchor);
-            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offcode, matchLength);
-            anchor = ip = start + matchLength;
-        }
-
-        /* check immediate repcode */
-        if (isDxS) {
-            while (ip <= ilimit) {
-                U32 const current2 = (U32)(ip-base);
-                U32 const repIndex = current2 - offset_2;
-                const BYTE* repMatch = repIndex < prefixLowestIndex ?
-                        dictBase - dictIndexDelta + repIndex :
-                        base + repIndex;
-                if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
-                   && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
-                    const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
-                    matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
-                    offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength);
-                    ip += matchLength;
-                    anchor = ip;
-                    continue;
-                }
-                break;
-            }
-        }
-
-        if (dictMode == ZSTD_noDict) {
-            while ( ((ip <= ilimit) & (offset_2>0))
-                 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
-                /* store sequence */
-                matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
-                offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap repcodes */
-                ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength);
-                ip += matchLength;
-                anchor = ip;
-                continue;   /* faster when present ... (?) */
-    }   }   }
-
-    /* Save reps for next block */
-    rep[0] = offset_1 ? offset_1 : savedOffset;
-    rep[1] = offset_2 ? offset_2 : savedOffset;
-
-    /* Return the last literals size */
-    return (size_t)(iend - anchor);
-}
-
-
-size_t ZSTD_compressBlock_btlazy2(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
-}
-
-size_t ZSTD_compressBlock_lazy2(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
-}
-
-size_t ZSTD_compressBlock_lazy(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
-}
-
-size_t ZSTD_compressBlock_greedy(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
-}
-
-size_t ZSTD_compressBlock_btlazy2_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
-}
-
-size_t ZSTD_compressBlock_lazy2_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
-}
-
-size_t ZSTD_compressBlock_lazy_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
-}
-
-size_t ZSTD_compressBlock_greedy_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
-}
-
-
-size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
-}
-
-size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
-}
-
-size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
-}
-
-/* Row-based matchfinder */
-size_t ZSTD_compressBlock_lazy2_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
-}
-
-size_t ZSTD_compressBlock_lazy_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
-}
-
-size_t ZSTD_compressBlock_greedy_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
-}
-
-size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
-}
-
-size_t ZSTD_compressBlock_lazy_dictMatchState_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
-}
-
-size_t ZSTD_compressBlock_greedy_dictMatchState_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
-}
-
-
-size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch);
-}
-
-size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
-}
-
-size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
-}
-
-FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_lazy_extDict_generic(
-                        ZSTD_matchState_t* ms, seqStore_t* seqStore,
-                        U32 rep[ZSTD_REP_NUM],
-                        const void* src, size_t srcSize,
-                        const searchMethod_e searchMethod, const U32 depth)
-{
-    const BYTE* const istart = (const BYTE*)src;
-    const BYTE* ip = istart;
-    const BYTE* anchor = istart;
-    const BYTE* const iend = istart + srcSize;
-    const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8;
-    const BYTE* const base = ms->window.base;
-    const U32 dictLimit = ms->window.dictLimit;
-    const BYTE* const prefixStart = base + dictLimit;
-    const BYTE* const dictBase = ms->window.dictBase;
-    const BYTE* const dictEnd  = dictBase + dictLimit;
-    const BYTE* const dictStart  = dictBase + ms->window.lowLimit;
-    const U32 windowLog = ms->cParams.windowLog;
-    const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5;
-
-    searchMax_f const searchMax = ZSTD_selectLazyVTable(ms, searchMethod, ZSTD_extDict)->searchMax;
-    U32 offset_1 = rep[0], offset_2 = rep[1];
-
-    DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod);
-
-    /* init */
-    ip += (ip == prefixStart);
-    if (searchMethod == search_rowHash) {
-        ZSTD_row_fillHashCache(ms, base, rowLog,
-                               MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
-                               ms->nextToUpdate, ilimit);
-    }
-
-    /* Match Loop */
-#if defined(__GNUC__) && defined(__x86_64__)
-    /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
-     * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
-     */
-    __asm__(".p2align 5");
-#endif
-    while (ip < ilimit) {
-        size_t matchLength=0;
-        size_t offcode=STORE_REPCODE_1;
-        const BYTE* start=ip+1;
-        U32 curr = (U32)(ip-base);
-
-        /* check repCode */
-        {   const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog);
-            const U32 repIndex = (U32)(curr+1 - offset_1);
-            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
-            const BYTE* const repMatch = repBase + repIndex;
-            if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */
-               & (offset_1 <= curr+1 - windowLow) ) /* note: we are searching at curr+1 */
-            if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
-                /* repcode detected we should take it */
-                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
-                if (depth==0) goto _storeSequence;
-        }   }
-
-        /* first search (depth 0) */
-        {   size_t offsetFound = 999999999;
-            size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
-            if (ml2 > matchLength)
-                matchLength = ml2, start = ip, offcode=offsetFound;
-        }
-
-        if (matchLength < 4) {
-            ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
-            continue;
-        }
-
-        /* let's try to find a better solution */
-        if (depth>=1)
-        while (ip<ilimit) {
-            ip ++;
-            curr++;
-            /* check repCode */
-            if (offcode) {
-                const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
-                const U32 repIndex = (U32)(curr - offset_1);
-                const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
-                const BYTE* const repMatch = repBase + repIndex;
-                if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
-                   & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
-                if (MEM_read32(ip) == MEM_read32(repMatch)) {
-                    /* repcode detected */
-                    const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                    size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
-                    int const gain2 = (int)(repLength * 3);
-                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
-                    if ((repLength >= 4) && (gain2 > gain1))
-                        matchLength = repLength, offcode = STORE_REPCODE_1, start = ip;
-            }   }
-
-            /* search match, depth 1 */
-            {   size_t offset2=999999999;
-                size_t const ml2 = searchMax(ms, ip, iend, &offset2);
-                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2)));   /* raw approx */
-                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4);
-                if ((ml2 >= 4) && (gain2 > gain1)) {
-                    matchLength = ml2, offcode = offset2, start = ip;
-                    continue;   /* search a better one */
-            }   }
-
-            /* let's find an even better one */
-            if ((depth==2) && (ip<ilimit)) {
-                ip ++;
-                curr++;
-                /* check repCode */
-                if (offcode) {
-                    const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
-                    const U32 repIndex = (U32)(curr - offset_1);
-                    const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
-                    const BYTE* const repMatch = repBase + repIndex;
-                    if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
-                       & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
-                    if (MEM_read32(ip) == MEM_read32(repMatch)) {
-                        /* repcode detected */
-                        const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                        size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
-                        int const gain2 = (int)(repLength * 4);
-                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
-                        if ((repLength >= 4) && (gain2 > gain1))
-                            matchLength = repLength, offcode = STORE_REPCODE_1, start = ip;
-                }   }
-
-                /* search match, depth 2 */
-                {   size_t offset2=999999999;
-                    size_t const ml2 = searchMax(ms, ip, iend, &offset2);
-                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2)));   /* raw approx */
-                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 7);
-                    if ((ml2 >= 4) && (gain2 > gain1)) {
-                        matchLength = ml2, offcode = offset2, start = ip;
-                        continue;
-            }   }   }
-            break;  /* nothing found : store previous solution */
-        }
-
-        /* catch up */
-        if (STORED_IS_OFFSET(offcode)) {
-            U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode));
-            const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
-            const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
-            while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
-            offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offcode);
-        }
-
-        /* store sequence */
-_storeSequence:
-        {   size_t const litLength = (size_t)(start - anchor);
-            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offcode, matchLength);
-            anchor = ip = start + matchLength;
-        }
-
-        /* check immediate repcode */
-        while (ip <= ilimit) {
-            const U32 repCurrent = (U32)(ip-base);
-            const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
-            const U32 repIndex = repCurrent - offset_2;
-            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
-            const BYTE* const repMatch = repBase + repIndex;
-            if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
-               & (offset_2 <= repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
-            if (MEM_read32(ip) == MEM_read32(repMatch)) {
-                /* repcode detected we should take it */
-                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
-                offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode;   /* swap offset history */
-                ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength);
-                ip += matchLength;
-                anchor = ip;
-                continue;   /* faster when present ... (?) */
-            }
-            break;
-    }   }
-
-    /* Save reps for next block */
-    rep[0] = offset_1;
-    rep[1] = offset_2;
-
-    /* Return the last literals size */
-    return (size_t)(iend - anchor);
-}
-
-
-size_t ZSTD_compressBlock_greedy_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
-}
-
-size_t ZSTD_compressBlock_lazy_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-
-{
-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
-}
-
-size_t ZSTD_compressBlock_lazy2_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-
-{
-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
-}
-
-size_t ZSTD_compressBlock_btlazy2_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-
-{
-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
-}
-
-size_t ZSTD_compressBlock_greedy_extDict_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
-}
-
-size_t ZSTD_compressBlock_lazy_extDict_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-
-{
-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
-}
-
-size_t ZSTD_compressBlock_lazy2_extDict_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-
-{
-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
-}
diff --git a/dep/zstd/lib/compress/zstd_lazy.h b/dep/zstd/lib/compress/zstd_lazy.h
deleted file mode 100644
index 150f7b390..000000000
--- a/dep/zstd/lib/compress/zstd_lazy.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef ZSTD_LAZY_H
-#define ZSTD_LAZY_H
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-#include "zstd_compress_internal.h"
-
-/**
- * Dedicated Dictionary Search Structure bucket log. In the
- * ZSTD_dedicatedDictSearch mode, the hashTable has
- * 2 ** ZSTD_LAZY_DDSS_BUCKET_LOG entries in each bucket, rather than just
- * one.
- */
-#define ZSTD_LAZY_DDSS_BUCKET_LOG 2
-
-U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
-void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
-
-void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
-
-void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue);  /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
-
-size_t ZSTD_compressBlock_btlazy2(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-
-size_t ZSTD_compressBlock_btlazy2_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_dictMatchState_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_dictMatchState_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-
-size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-
-size_t ZSTD_compressBlock_greedy_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_extDict_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_extDict_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_extDict_row(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_btlazy2_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-        
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif /* ZSTD_LAZY_H */
diff --git a/dep/zstd/lib/compress/zstd_ldm.c b/dep/zstd/lib/compress/zstd_ldm.c
deleted file mode 100644
index f662b2546..000000000
--- a/dep/zstd/lib/compress/zstd_ldm.c
+++ /dev/null
@@ -1,724 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#include "zstd_ldm.h"
-
-#include "../common/debug.h"
-#include "../common/xxhash.h"
-#include "zstd_fast.h"          /* ZSTD_fillHashTable() */
-#include "zstd_double_fast.h"   /* ZSTD_fillDoubleHashTable() */
-#include "zstd_ldm_geartab.h"
-
-#define LDM_BUCKET_SIZE_LOG 3
-#define LDM_MIN_MATCH_LENGTH 64
-#define LDM_HASH_RLOG 7
-
-typedef struct {
-    U64 rolling;
-    U64 stopMask;
-} ldmRollingHashState_t;
-
-/** ZSTD_ldm_gear_init():
- *
- * Initializes the rolling hash state such that it will honor the
- * settings in params. */
-static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params)
-{
-    unsigned maxBitsInMask = MIN(params->minMatchLength, 64);
-    unsigned hashRateLog = params->hashRateLog;
-
-    state->rolling = ~(U32)0;
-
-    /* The choice of the splitting criterion is subject to two conditions:
-     *   1. it has to trigger on average every 2^(hashRateLog) bytes;
-     *   2. ideally, it has to depend on a window of minMatchLength bytes.
-     *
-     * In the gear hash algorithm, bit n depends on the last n bytes;
-     * so in order to obtain a good quality splitting criterion it is
-     * preferable to use bits with high weight.
-     *
-     * To match condition 1 we use a mask with hashRateLog bits set
-     * and, because of the previous remark, we make sure these bits
-     * have the highest possible weight while still respecting
-     * condition 2.
-     */
-    if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) {
-        state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog);
-    } else {
-        /* In this degenerate case we simply honor the hash rate. */
-        state->stopMask = ((U64)1 << hashRateLog) - 1;
-    }
-}
-
-/** ZSTD_ldm_gear_reset()
- * Feeds [data, data + minMatchLength) into the hash without registering any
- * splits. This effectively resets the hash state. This is used when skipping
- * over data, either at the beginning of a block, or skipping sections.
- */
-static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state,
-                                BYTE const* data, size_t minMatchLength)
-{
-    U64 hash = state->rolling;
-    size_t n = 0;
-
-#define GEAR_ITER_ONCE() do {                                  \
-        hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
-        n += 1;                                                \
-    } while (0)
-    while (n + 3 < minMatchLength) {
-        GEAR_ITER_ONCE();
-        GEAR_ITER_ONCE();
-        GEAR_ITER_ONCE();
-        GEAR_ITER_ONCE();
-    }
-    while (n < minMatchLength) {
-        GEAR_ITER_ONCE();
-    }
-#undef GEAR_ITER_ONCE
-}
-
-/** ZSTD_ldm_gear_feed():
- *
- * Registers in the splits array all the split points found in the first
- * size bytes following the data pointer. This function terminates when
- * either all the data has been processed or LDM_BATCH_SIZE splits are
- * present in the splits array.
- *
- * Precondition: The splits array must not be full.
- * Returns: The number of bytes processed. */
-static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state,
-                                 BYTE const* data, size_t size,
-                                 size_t* splits, unsigned* numSplits)
-{
-    size_t n;
-    U64 hash, mask;
-
-    hash = state->rolling;
-    mask = state->stopMask;
-    n = 0;
-
-#define GEAR_ITER_ONCE() do { \
-        hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
-        n += 1; \
-        if (UNLIKELY((hash & mask) == 0)) { \
-            splits[*numSplits] = n; \
-            *numSplits += 1; \
-            if (*numSplits == LDM_BATCH_SIZE) \
-                goto done; \
-        } \
-    } while (0)
-
-    while (n + 3 < size) {
-        GEAR_ITER_ONCE();
-        GEAR_ITER_ONCE();
-        GEAR_ITER_ONCE();
-        GEAR_ITER_ONCE();
-    }
-    while (n < size) {
-        GEAR_ITER_ONCE();
-    }
-
-#undef GEAR_ITER_ONCE
-
-done:
-    state->rolling = hash;
-    return n;
-}
-
-void ZSTD_ldm_adjustParameters(ldmParams_t* params,
-                               ZSTD_compressionParameters const* cParams)
-{
-    params->windowLog = cParams->windowLog;
-    ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
-    DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
-    if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
-    if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
-    if (params->hashLog == 0) {
-        params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
-        assert(params->hashLog <= ZSTD_HASHLOG_MAX);
-    }
-    if (params->hashRateLog == 0) {
-        params->hashRateLog = params->windowLog < params->hashLog
-                                   ? 0
-                                   : params->windowLog - params->hashLog;
-    }
-    params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
-}
-
-size_t ZSTD_ldm_getTableSize(ldmParams_t params)
-{
-    size_t const ldmHSize = ((size_t)1) << params.hashLog;
-    size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
-    size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
-    size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
-                           + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
-    return params.enableLdm == ZSTD_ps_enable ? totalSize : 0;
-}
-
-size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
-{
-    return params.enableLdm == ZSTD_ps_enable ? (maxChunkSize / params.minMatchLength) : 0;
-}
-
-/** ZSTD_ldm_getBucket() :
- *  Returns a pointer to the start of the bucket associated with hash. */
-static ldmEntry_t* ZSTD_ldm_getBucket(
-        ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
-{
-    return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
-}
-
-/** ZSTD_ldm_insertEntry() :
- *  Insert the entry with corresponding hash into the hash table */
-static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
-                                 size_t const hash, const ldmEntry_t entry,
-                                 ldmParams_t const ldmParams)
-{
-    BYTE* const pOffset = ldmState->bucketOffsets + hash;
-    unsigned const offset = *pOffset;
-
-    *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry;
-    *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1));
-
-}
-
-/** ZSTD_ldm_countBackwardsMatch() :
- *  Returns the number of bytes that match backwards before pIn and pMatch.
- *
- *  We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
-static size_t ZSTD_ldm_countBackwardsMatch(
-            const BYTE* pIn, const BYTE* pAnchor,
-            const BYTE* pMatch, const BYTE* pMatchBase)
-{
-    size_t matchLength = 0;
-    while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) {
-        pIn--;
-        pMatch--;
-        matchLength++;
-    }
-    return matchLength;
-}
-
-/** ZSTD_ldm_countBackwardsMatch_2segments() :
- *  Returns the number of bytes that match backwards from pMatch,
- *  even with the backwards match spanning 2 different segments.
- *
- *  On reaching `pMatchBase`, start counting from mEnd */
-static size_t ZSTD_ldm_countBackwardsMatch_2segments(
-                    const BYTE* pIn, const BYTE* pAnchor,
-                    const BYTE* pMatch, const BYTE* pMatchBase,
-                    const BYTE* pExtDictStart, const BYTE* pExtDictEnd)
-{
-    size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase);
-    if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) {
-        /* If backwards match is entirely in the extDict or prefix, immediately return */
-        return matchLength;
-    }
-    DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength);
-    matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart);
-    DEBUGLOG(7, "final backwards match length = %zu", matchLength);
-    return matchLength;
-}
-
-/** ZSTD_ldm_fillFastTables() :
- *
- *  Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
- *  This is similar to ZSTD_loadDictionaryContent.
- *
- *  The tables for the other strategies are filled within their
- *  block compressors. */
-static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
-                                      void const* end)
-{
-    const BYTE* const iend = (const BYTE*)end;
-
-    switch(ms->cParams.strategy)
-    {
-    case ZSTD_fast:
-        ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
-        break;
-
-    case ZSTD_dfast:
-        ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
-        break;
-
-    case ZSTD_greedy:
-    case ZSTD_lazy:
-    case ZSTD_lazy2:
-    case ZSTD_btlazy2:
-    case ZSTD_btopt:
-    case ZSTD_btultra:
-    case ZSTD_btultra2:
-        break;
-    default:
-        assert(0);  /* not possible : not a valid strategy id */
-    }
-
-    return 0;
-}
-
-void ZSTD_ldm_fillHashTable(
-            ldmState_t* ldmState, const BYTE* ip,
-            const BYTE* iend, ldmParams_t const* params)
-{
-    U32 const minMatchLength = params->minMatchLength;
-    U32 const hBits = params->hashLog - params->bucketSizeLog;
-    BYTE const* const base = ldmState->window.base;
-    BYTE const* const istart = ip;
-    ldmRollingHashState_t hashState;
-    size_t* const splits = ldmState->splitIndices;
-    unsigned numSplits;
-
-    DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
-
-    ZSTD_ldm_gear_init(&hashState, params);
-    while (ip < iend) {
-        size_t hashed;
-        unsigned n;
-
-        numSplits = 0;
-        hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);
-
-        for (n = 0; n < numSplits; n++) {
-            if (ip + splits[n] >= istart + minMatchLength) {
-                BYTE const* const split = ip + splits[n] - minMatchLength;
-                U64 const xxhash = XXH64(split, minMatchLength, 0);
-                U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
-                ldmEntry_t entry;
-
-                entry.offset = (U32)(split - base);
-                entry.checksum = (U32)(xxhash >> 32);
-                ZSTD_ldm_insertEntry(ldmState, hash, entry, *params);
-            }
-        }
-
-        ip += hashed;
-    }
-}
-
-
-/** ZSTD_ldm_limitTableUpdate() :
- *
- *  Sets cctx->nextToUpdate to a position corresponding closer to anchor
- *  if it is far way
- *  (after a long match, only update tables a limited amount). */
-static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
-{
-    U32 const curr = (U32)(anchor - ms->window.base);
-    if (curr > ms->nextToUpdate + 1024) {
-        ms->nextToUpdate =
-            curr - MIN(512, curr - ms->nextToUpdate - 1024);
-    }
-}
-
-static size_t ZSTD_ldm_generateSequences_internal(
-        ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
-        ldmParams_t const* params, void const* src, size_t srcSize)
-{
-    /* LDM parameters */
-    int const extDict = ZSTD_window_hasExtDict(ldmState->window);
-    U32 const minMatchLength = params->minMatchLength;
-    U32 const entsPerBucket = 1U << params->bucketSizeLog;
-    U32 const hBits = params->hashLog - params->bucketSizeLog;
-    /* Prefix and extDict parameters */
-    U32 const dictLimit = ldmState->window.dictLimit;
-    U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
-    BYTE const* const base = ldmState->window.base;
-    BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
-    BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
-    BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
-    BYTE const* const lowPrefixPtr = base + dictLimit;
-    /* Input bounds */
-    BYTE const* const istart = (BYTE const*)src;
-    BYTE const* const iend = istart + srcSize;
-    BYTE const* const ilimit = iend - HASH_READ_SIZE;
-    /* Input positions */
-    BYTE const* anchor = istart;
-    BYTE const* ip = istart;
-    /* Rolling hash state */
-    ldmRollingHashState_t hashState;
-    /* Arrays for staged-processing */
-    size_t* const splits = ldmState->splitIndices;
-    ldmMatchCandidate_t* const candidates = ldmState->matchCandidates;
-    unsigned numSplits;
-
-    if (srcSize < minMatchLength)
-        return iend - anchor;
-
-    /* Initialize the rolling hash state with the first minMatchLength bytes */
-    ZSTD_ldm_gear_init(&hashState, params);
-    ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength);
-    ip += minMatchLength;
-
-    while (ip < ilimit) {
-        size_t hashed;
-        unsigned n;
-
-        numSplits = 0;
-        hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip,
-                                    splits, &numSplits);
-
-        for (n = 0; n < numSplits; n++) {
-            BYTE const* const split = ip + splits[n] - minMatchLength;
-            U64 const xxhash = XXH64(split, minMatchLength, 0);
-            U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
-
-            candidates[n].split = split;
-            candidates[n].hash = hash;
-            candidates[n].checksum = (U32)(xxhash >> 32);
-            candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params);
-            PREFETCH_L1(candidates[n].bucket);
-        }
-
-        for (n = 0; n < numSplits; n++) {
-            size_t forwardMatchLength = 0, backwardMatchLength = 0,
-                   bestMatchLength = 0, mLength;
-            U32 offset;
-            BYTE const* const split = candidates[n].split;
-            U32 const checksum = candidates[n].checksum;
-            U32 const hash = candidates[n].hash;
-            ldmEntry_t* const bucket = candidates[n].bucket;
-            ldmEntry_t const* cur;
-            ldmEntry_t const* bestEntry = NULL;
-            ldmEntry_t newEntry;
-
-            newEntry.offset = (U32)(split - base);
-            newEntry.checksum = checksum;
-
-            /* If a split point would generate a sequence overlapping with
-             * the previous one, we merely register it in the hash table and
-             * move on */
-            if (split < anchor) {
-                ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
-                continue;
-            }
-
-            for (cur = bucket; cur < bucket + entsPerBucket; cur++) {
-                size_t curForwardMatchLength, curBackwardMatchLength,
-                       curTotalMatchLength;
-                if (cur->checksum != checksum || cur->offset <= lowestIndex) {
-                    continue;
-                }
-                if (extDict) {
-                    BYTE const* const curMatchBase =
-                        cur->offset < dictLimit ? dictBase : base;
-                    BYTE const* const pMatch = curMatchBase + cur->offset;
-                    BYTE const* const matchEnd =
-                        cur->offset < dictLimit ? dictEnd : iend;
-                    BYTE const* const lowMatchPtr =
-                        cur->offset < dictLimit ? dictStart : lowPrefixPtr;
-                    curForwardMatchLength =
-                        ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr);
-                    if (curForwardMatchLength < minMatchLength) {
-                        continue;
-                    }
-                    curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments(
-                            split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd);
-                } else { /* !extDict */
-                    BYTE const* const pMatch = base + cur->offset;
-                    curForwardMatchLength = ZSTD_count(split, pMatch, iend);
-                    if (curForwardMatchLength < minMatchLength) {
-                        continue;
-                    }
-                    curBackwardMatchLength =
-                        ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr);
-                }
-                curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength;
-
-                if (curTotalMatchLength > bestMatchLength) {
-                    bestMatchLength = curTotalMatchLength;
-                    forwardMatchLength = curForwardMatchLength;
-                    backwardMatchLength = curBackwardMatchLength;
-                    bestEntry = cur;
-                }
-            }
-
-            /* No match found -- insert an entry into the hash table
-             * and process the next candidate match */
-            if (bestEntry == NULL) {
-                ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
-                continue;
-            }
-
-            /* Match found */
-            offset = (U32)(split - base) - bestEntry->offset;
-            mLength = forwardMatchLength + backwardMatchLength;
-            {
-                rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
-
-                /* Out of sequence storage */
-                if (rawSeqStore->size == rawSeqStore->capacity)
-                    return ERROR(dstSize_tooSmall);
-                seq->litLength = (U32)(split - backwardMatchLength - anchor);
-                seq->matchLength = (U32)mLength;
-                seq->offset = offset;
-                rawSeqStore->size++;
-            }
-
-            /* Insert the current entry into the hash table --- it must be
-             * done after the previous block to avoid clobbering bestEntry */
-            ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
-
-            anchor = split + forwardMatchLength;
-
-            /* If we find a match that ends after the data that we've hashed
-             * then we have a repeating, overlapping, pattern. E.g. all zeros.
-             * If one repetition of the pattern matches our `stopMask` then all
-             * repetitions will. We don't need to insert them all into out table,
-             * only the first one. So skip over overlapping matches.
-             * This is a major speed boost (20x) for compressing a single byte
-             * repeated, when that byte ends up in the table.
-             */
-            if (anchor > ip + hashed) {
-                ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);
-                /* Continue the outer loop at anchor (ip + hashed == anchor). */
-                ip = anchor - hashed;
-                break;
-            }
-        }
-
-        ip += hashed;
-    }
-
-    return iend - anchor;
-}
-
-/*! ZSTD_ldm_reduceTable() :
- *  reduce table indexes by `reducerValue` */
-static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
-                                 U32 const reducerValue)
-{
-    U32 u;
-    for (u = 0; u < size; u++) {
-        if (table[u].offset < reducerValue) table[u].offset = 0;
-        else table[u].offset -= reducerValue;
-    }
-}
-
-size_t ZSTD_ldm_generateSequences(
-        ldmState_t* ldmState, rawSeqStore_t* sequences,
-        ldmParams_t const* params, void const* src, size_t srcSize)
-{
-    U32 const maxDist = 1U << params->windowLog;
-    BYTE const* const istart = (BYTE const*)src;
-    BYTE const* const iend = istart + srcSize;
-    size_t const kMaxChunkSize = 1 << 20;
-    size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
-    size_t chunk;
-    size_t leftoverSize = 0;
-
-    assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
-    /* Check that ZSTD_window_update() has been called for this chunk prior
-     * to passing it to this function.
-     */
-    assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
-    /* The input could be very large (in zstdmt), so it must be broken up into
-     * chunks to enforce the maximum distance and handle overflow correction.
-     */
-    assert(sequences->pos <= sequences->size);
-    assert(sequences->size <= sequences->capacity);
-    for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
-        BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
-        size_t const remaining = (size_t)(iend - chunkStart);
-        BYTE const *const chunkEnd =
-            (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
-        size_t const chunkSize = chunkEnd - chunkStart;
-        size_t newLeftoverSize;
-        size_t const prevSize = sequences->size;
-
-        assert(chunkStart < iend);
-        /* 1. Perform overflow correction if necessary. */
-        if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) {
-            U32 const ldmHSize = 1U << params->hashLog;
-            U32 const correction = ZSTD_window_correctOverflow(
-                &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
-            ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
-            /* invalidate dictionaries on overflow correction */
-            ldmState->loadedDictEnd = 0;
-        }
-        /* 2. We enforce the maximum offset allowed.
-         *
-         * kMaxChunkSize should be small enough that we don't lose too much of
-         * the window through early invalidation.
-         * TODO: * Test the chunk size.
-         *       * Try invalidation after the sequence generation and test the
-         *         the offset against maxDist directly.
-         *
-         * NOTE: Because of dictionaries + sequence splitting we MUST make sure
-         * that any offset used is valid at the END of the sequence, since it may
-         * be split into two sequences. This condition holds when using
-         * ZSTD_window_enforceMaxDist(), but if we move to checking offsets
-         * against maxDist directly, we'll have to carefully handle that case.
-         */
-        ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
-        /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
-        newLeftoverSize = ZSTD_ldm_generateSequences_internal(
-            ldmState, sequences, params, chunkStart, chunkSize);
-        if (ZSTD_isError(newLeftoverSize))
-            return newLeftoverSize;
-        /* 4. We add the leftover literals from previous iterations to the first
-         *    newly generated sequence, or add the `newLeftoverSize` if none are
-         *    generated.
-         */
-        /* Prepend the leftover literals from the last call */
-        if (prevSize < sequences->size) {
-            sequences->seq[prevSize].litLength += (U32)leftoverSize;
-            leftoverSize = newLeftoverSize;
-        } else {
-            assert(newLeftoverSize == chunkSize);
-            leftoverSize += chunkSize;
-        }
-    }
-    return 0;
-}
-
-void
-ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch)
-{
-    while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
-        rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
-        if (srcSize <= seq->litLength) {
-            /* Skip past srcSize literals */
-            seq->litLength -= (U32)srcSize;
-            return;
-        }
-        srcSize -= seq->litLength;
-        seq->litLength = 0;
-        if (srcSize < seq->matchLength) {
-            /* Skip past the first srcSize of the match */
-            seq->matchLength -= (U32)srcSize;
-            if (seq->matchLength < minMatch) {
-                /* The match is too short, omit it */
-                if (rawSeqStore->pos + 1 < rawSeqStore->size) {
-                    seq[1].litLength += seq[0].matchLength;
-                }
-                rawSeqStore->pos++;
-            }
-            return;
-        }
-        srcSize -= seq->matchLength;
-        seq->matchLength = 0;
-        rawSeqStore->pos++;
-    }
-}
-
-/**
- * If the sequence length is longer than remaining then the sequence is split
- * between this block and the next.
- *
- * Returns the current sequence to handle, or if the rest of the block should
- * be literals, it returns a sequence with offset == 0.
- */
-static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
-                                 U32 const remaining, U32 const minMatch)
-{
-    rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
-    assert(sequence.offset > 0);
-    /* Likely: No partial sequence */
-    if (remaining >= sequence.litLength + sequence.matchLength) {
-        rawSeqStore->pos++;
-        return sequence;
-    }
-    /* Cut the sequence short (offset == 0 ==> rest is literals). */
-    if (remaining <= sequence.litLength) {
-        sequence.offset = 0;
-    } else if (remaining < sequence.litLength + sequence.matchLength) {
-        sequence.matchLength = remaining - sequence.litLength;
-        if (sequence.matchLength < minMatch) {
-            sequence.offset = 0;
-        }
-    }
-    /* Skip past `remaining` bytes for the future sequences. */
-    ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
-    return sequence;
-}
-
-void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
-    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
-    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
-        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
-        if (currPos >= currSeq.litLength + currSeq.matchLength) {
-            currPos -= currSeq.litLength + currSeq.matchLength;
-            rawSeqStore->pos++;
-        } else {
-            rawSeqStore->posInSequence = currPos;
-            break;
-        }
-    }
-    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
-        rawSeqStore->posInSequence = 0;
-    }
-}
-
-size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
-    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-    ZSTD_paramSwitch_e useRowMatchFinder,
-    void const* src, size_t srcSize)
-{
-    const ZSTD_compressionParameters* const cParams = &ms->cParams;
-    unsigned const minMatch = cParams->minMatch;
-    ZSTD_blockCompressor const blockCompressor =
-        ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms));
-    /* Input bounds */
-    BYTE const* const istart = (BYTE const*)src;
-    BYTE const* const iend = istart + srcSize;
-    /* Input positions */
-    BYTE const* ip = istart;
-
-    DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
-    /* If using opt parser, use LDMs only as candidates rather than always accepting them */
-    if (cParams->strategy >= ZSTD_btopt) {
-        size_t lastLLSize;
-        ms->ldmSeqStore = rawSeqStore;
-        lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize);
-        ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize);
-        return lastLLSize;
-    }
-
-    assert(rawSeqStore->pos <= rawSeqStore->size);
-    assert(rawSeqStore->size <= rawSeqStore->capacity);
-    /* Loop through each sequence and apply the block compressor to the literals */
-    while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
-        /* maybeSplitSequence updates rawSeqStore->pos */
-        rawSeq const sequence = maybeSplitSequence(rawSeqStore,
-                                                   (U32)(iend - ip), minMatch);
-        int i;
-        /* End signal */
-        if (sequence.offset == 0)
-            break;
-
-        assert(ip + sequence.litLength + sequence.matchLength <= iend);
-
-        /* Fill tables for block compressor */
-        ZSTD_ldm_limitTableUpdate(ms, ip);
-        ZSTD_ldm_fillFastTables(ms, ip);
-        /* Run the block compressor */
-        DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
-        {
-            size_t const newLitLength =
-                blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
-            ip += sequence.litLength;
-            /* Update the repcodes */
-            for (i = ZSTD_REP_NUM - 1; i > 0; i--)
-                rep[i] = rep[i-1];
-            rep[0] = sequence.offset;
-            /* Store the sequence */
-            ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
-                          STORE_OFFSET(sequence.offset),
-                          sequence.matchLength);
-            ip += sequence.matchLength;
-        }
-    }
-    /* Fill the tables for the block compressor */
-    ZSTD_ldm_limitTableUpdate(ms, ip);
-    ZSTD_ldm_fillFastTables(ms, ip);
-    /* Compress the last literals */
-    return blockCompressor(ms, seqStore, rep, ip, iend - ip);
-}
diff --git a/dep/zstd/lib/compress/zstd_ldm.h b/dep/zstd/lib/compress/zstd_ldm.h
deleted file mode 100644
index 4e68dbf52..000000000
--- a/dep/zstd/lib/compress/zstd_ldm.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef ZSTD_LDM_H
-#define ZSTD_LDM_H
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-#include "zstd_compress_internal.h"   /* ldmParams_t, U32 */
-#include "../zstd.h"   /* ZSTD_CCtx, size_t */
-
-/*-*************************************
-*  Long distance matching
-***************************************/
-
-#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT
-
-void ZSTD_ldm_fillHashTable(
-            ldmState_t* state, const BYTE* ip,
-            const BYTE* iend, ldmParams_t const* params);
-
-/**
- * ZSTD_ldm_generateSequences():
- *
- * Generates the sequences using the long distance match finder.
- * Generates long range matching sequences in `sequences`, which parse a prefix
- * of the source. `sequences` must be large enough to store every sequence,
- * which can be checked with `ZSTD_ldm_getMaxNbSeq()`.
- * @returns 0 or an error code.
- *
- * NOTE: The user must have called ZSTD_window_update() for all of the input
- * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks.
- * NOTE: This function returns an error if it runs out of space to store
- *       sequences.
- */
-size_t ZSTD_ldm_generateSequences(
-            ldmState_t* ldms, rawSeqStore_t* sequences,
-            ldmParams_t const* params, void const* src, size_t srcSize);
-
-/**
- * ZSTD_ldm_blockCompress():
- *
- * Compresses a block using the predefined sequences, along with a secondary
- * block compressor. The literals section of every sequence is passed to the
- * secondary block compressor, and those sequences are interspersed with the
- * predefined sequences. Returns the length of the last literals.
- * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed.
- * `rawSeqStore.seq` may also be updated to split the last sequence between two
- * blocks.
- * @return The length of the last literals.
- *
- * NOTE: The source must be at most the maximum block size, but the predefined
- * sequences can be any size, and may be longer than the block. In the case that
- * they are longer than the block, the last sequences may need to be split into
- * two. We handle that case correctly, and update `rawSeqStore` appropriately.
- * NOTE: This function does not return any errors.
- */
-size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
-            ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-            ZSTD_paramSwitch_e useRowMatchFinder,
-            void const* src, size_t srcSize);
-
-/**
- * ZSTD_ldm_skipSequences():
- *
- * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`.
- * Avoids emitting matches less than `minMatch` bytes.
- * Must be called for data that is not passed to ZSTD_ldm_blockCompress().
- */
-void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
-    U32 const minMatch);
-
-/* ZSTD_ldm_skipRawSeqStoreBytes():
- * Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'.
- * Not to be used in conjunction with ZSTD_ldm_skipSequences().
- * Must be called for data with is not passed to ZSTD_ldm_blockCompress().
- */
-void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes);
-
-/** ZSTD_ldm_getTableSize() :
- *  Estimate the space needed for long distance matching tables or 0 if LDM is
- *  disabled.
- */
-size_t ZSTD_ldm_getTableSize(ldmParams_t params);
-
-/** ZSTD_ldm_getSeqSpace() :
- *  Return an upper bound on the number of sequences that can be produced by
- *  the long distance matcher, or 0 if LDM is disabled.
- */
-size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize);
-
-/** ZSTD_ldm_adjustParameters() :
- *  If the params->hashRateLog is not set, set it to its default value based on
- *  windowLog and params->hashLog.
- *
- *  Ensures that params->bucketSizeLog is <= params->hashLog (setting it to
- *  params->hashLog if it is not).
- *
- *  Ensures that the minMatchLength >= targetLength during optimal parsing.
- */
-void ZSTD_ldm_adjustParameters(ldmParams_t* params,
-                               ZSTD_compressionParameters const* cParams);
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif /* ZSTD_FAST_H */
diff --git a/dep/zstd/lib/compress/zstd_ldm_geartab.h b/dep/zstd/lib/compress/zstd_ldm_geartab.h
deleted file mode 100644
index 647f865be..000000000
--- a/dep/zstd/lib/compress/zstd_ldm_geartab.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef ZSTD_LDM_GEARTAB_H
-#define ZSTD_LDM_GEARTAB_H
-
-#include "../common/compiler.h" /* UNUSED_ATTR */
-#include "../common/mem.h"      /* U64 */
-
-static UNUSED_ATTR const U64 ZSTD_ldm_gearTab[256] = {
-    0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc,
-    0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05,
-    0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e,
-    0x9c8528f65badeaca, 0x86563706e2097529, 0x2902475fa375d889,
-    0xafb32a9739a5ebe6, 0xce2714da3883e639, 0x21eaf821722e69e,
-    0x37b628620b628,    0x49a8d455d88caf5,  0x8556d711e6958140,
-    0x4f7ae74fc605c1f,  0x829f0c3468bd3a20, 0x4ffdc885c625179e,
-    0x8473de048a3daf1b, 0x51008822b05646b2, 0x69d75d12b2d1cc5f,
-    0x8c9d4a19159154bc, 0xc3cc10f4abbd4003, 0xd06ddc1cecb97391,
-    0xbe48e6e7ed80302e, 0x3481db31cee03547, 0xacc3f67cdaa1d210,
-    0x65cb771d8c7f96cc, 0x8eb27177055723dd, 0xc789950d44cd94be,
-    0x934feadc3700b12b, 0x5e485f11edbdf182, 0x1e2e2a46fd64767a,
-    0x2969ca71d82efa7c, 0x9d46e9935ebbba2e, 0xe056b67e05e6822b,
-    0x94d73f55739d03a0, 0xcd7010bdb69b5a03, 0x455ef9fcd79b82f4,
-    0x869cb54a8749c161, 0x38d1a4fa6185d225, 0xb475166f94bbe9bb,
-    0xa4143548720959f1, 0x7aed4780ba6b26ba, 0xd0ce264439e02312,
-    0x84366d746078d508, 0xa8ce973c72ed17be, 0x21c323a29a430b01,
-    0x9962d617e3af80ee, 0xab0ce91d9c8cf75b, 0x530e8ee6d19a4dbc,
-    0x2ef68c0cf53f5d72, 0xc03a681640a85506, 0x496e4e9f9c310967,
-    0x78580472b59b14a0, 0x273824c23b388577, 0x66bf923ad45cb553,
-    0x47ae1a5a2492ba86, 0x35e304569e229659, 0x4765182a46870b6f,
-    0x6cbab625e9099412, 0xddac9a2e598522c1, 0x7172086e666624f2,
-    0xdf5003ca503b7837, 0x88c0c1db78563d09, 0x58d51865acfc289d,
-    0x177671aec65224f1, 0xfb79d8a241e967d7, 0x2be1e101cad9a49a,
-    0x6625682f6e29186b, 0x399553457ac06e50, 0x35dffb4c23abb74,
-    0x429db2591f54aade, 0xc52802a8037d1009, 0x6acb27381f0b25f3,
-    0xf45e2551ee4f823b, 0x8b0ea2d99580c2f7, 0x3bed519cbcb4e1e1,
-    0xff452823dbb010a,  0x9d42ed614f3dd267, 0x5b9313c06257c57b,
-    0xa114b8008b5e1442, 0xc1fe311c11c13d4b, 0x66e8763ea34c5568,
-    0x8b982af1c262f05d, 0xee8876faaa75fbb7, 0x8a62a4d0d172bb2a,
-    0xc13d94a3b7449a97, 0x6dbbba9dc15d037c, 0xc786101f1d92e0f1,
-    0xd78681a907a0b79b, 0xf61aaf2962c9abb9, 0x2cfd16fcd3cb7ad9,
-    0x868c5b6744624d21, 0x25e650899c74ddd7, 0xba042af4a7c37463,
-    0x4eb1a539465a3eca, 0xbe09dbf03b05d5ca, 0x774e5a362b5472ba,
-    0x47a1221229d183cd, 0x504b0ca18ef5a2df, 0xdffbdfbde2456eb9,
-    0x46cd2b2fbee34634, 0xf2aef8fe819d98c3, 0x357f5276d4599d61,
-    0x24a5483879c453e3, 0x88026889192b4b9,  0x28da96671782dbec,
-    0x4ef37c40588e9aaa, 0x8837b90651bc9fb3, 0xc164f741d3f0e5d6,
-    0xbc135a0a704b70ba, 0x69cd868f7622ada,  0xbc37ba89e0b9c0ab,
-    0x47c14a01323552f6, 0x4f00794bacee98bb, 0x7107de7d637a69d5,
-    0x88af793bb6f2255e, 0xf3c6466b8799b598, 0xc288c616aa7f3b59,
-    0x81ca63cf42fca3fd, 0x88d85ace36a2674b, 0xd056bd3792389e7,
-    0xe55c396c4e9dd32d, 0xbefb504571e6c0a6, 0x96ab32115e91e8cc,
-    0xbf8acb18de8f38d1, 0x66dae58801672606, 0x833b6017872317fb,
-    0xb87c16f2d1c92864, 0xdb766a74e58b669c, 0x89659f85c61417be,
-    0xc8daad856011ea0c, 0x76a4b565b6fe7eae, 0xa469d085f6237312,
-    0xaaf0365683a3e96c, 0x4dbb746f8424f7b8, 0x638755af4e4acc1,
-    0x3d7807f5bde64486, 0x17be6d8f5bbb7639, 0x903f0cd44dc35dc,
-    0x67b672eafdf1196c, 0xa676ff93ed4c82f1, 0x521d1004c5053d9d,
-    0x37ba9ad09ccc9202, 0x84e54d297aacfb51, 0xa0b4b776a143445,
-    0x820d471e20b348e,  0x1874383cb83d46dc, 0x97edeec7a1efe11c,
-    0xb330e50b1bdc42aa, 0x1dd91955ce70e032, 0xa514cdb88f2939d5,
-    0x2791233fd90db9d3, 0x7b670a4cc50f7a9b, 0x77c07d2a05c6dfa5,
-    0xe3778b6646d0a6fa, 0xb39c8eda47b56749, 0x933ed448addbef28,
-    0xaf846af6ab7d0bf4, 0xe5af208eb666e49,  0x5e6622f73534cd6a,
-    0x297daeca42ef5b6e, 0x862daef3d35539a6, 0xe68722498f8e1ea9,
-    0x981c53093dc0d572, 0xfa09b0bfbf86fbf5, 0x30b1e96166219f15,
-    0x70e7d466bdc4fb83, 0x5a66736e35f2a8e9, 0xcddb59d2b7c1baef,
-    0xd6c7d247d26d8996, 0xea4e39eac8de1ba3, 0x539c8bb19fa3aff2,
-    0x9f90e4c5fd508d8,  0xa34e5956fbaf3385, 0x2e2f8e151d3ef375,
-    0x173691e9b83faec1, 0xb85a8d56bf016379, 0x8382381267408ae3,
-    0xb90f901bbdc0096d, 0x7c6ad32933bcec65, 0x76bb5e2f2c8ad595,
-    0x390f851a6cf46d28, 0xc3e6064da1c2da72, 0xc52a0c101cfa5389,
-    0xd78eaf84a3fbc530, 0x3781b9e2288b997e, 0x73c2f6dea83d05c4,
-    0x4228e364c5b5ed7,  0x9d7a3edf0da43911, 0x8edcfeda24686756,
-    0x5e7667a7b7a9b3a1, 0x4c4f389fa143791d, 0xb08bc1023da7cddc,
-    0x7ab4be3ae529b1cc, 0x754e6132dbe74ff9, 0x71635442a839df45,
-    0x2f6fb1643fbe52de, 0x961e0a42cf7a8177, 0xf3b45d83d89ef2ea,
-    0xee3de4cf4a6e3e9b, 0xcd6848542c3295e7, 0xe4cee1664c78662f,
-    0x9947548b474c68c4, 0x25d73777a5ed8b0b, 0xc915b1d636b7fc,
-    0x21c2ba75d9b0d2da, 0x5f6b5dcf608a64a1, 0xdcf333255ff9570c,
-    0x633b922418ced4ee, 0xc136dde0b004b34a, 0x58cc83b05d4b2f5a,
-    0x5eb424dda28e42d2, 0x62df47369739cd98, 0xb4e0b42485e4ce17,
-    0x16e1f0c1f9a8d1e7, 0x8ec3916707560ebf, 0x62ba6e2df2cc9db3,
-    0xcbf9f4ff77d83a16, 0x78d9d7d07d2bbcc4, 0xef554ce1e02c41f4,
-    0x8d7581127eccf94d, 0xa9b53336cb3c8a05, 0x38c42c0bf45c4f91,
-    0x640893cdf4488863, 0x80ec34bc575ea568, 0x39f324f5b48eaa40,
-    0xe9d9ed1f8eff527f, 0x9224fc058cc5a214, 0xbaba00b04cfe7741,
-    0x309a9f120fcf52af, 0xa558f3ec65626212, 0x424bec8b7adabe2f,
-    0x41622513a6aea433, 0xb88da2d5324ca798, 0xd287733b245528a4,
-    0x9a44697e6d68aec3, 0x7b1093be2f49bb28, 0x50bbec632e3d8aad,
-    0x6cd90723e1ea8283, 0x897b9e7431b02bf3, 0x219efdcb338a7047,
-    0x3b0311f0a27c0656, 0xdb17bf91c0db96e7, 0x8cd4fd6b4e85a5b2,
-    0xfab071054ba6409d, 0x40d6fe831fa9dfd9, 0xaf358debad7d791e,
-    0xeb8d0e25a65e3e58, 0xbbcbd3df14e08580, 0xcf751f27ecdab2b,
-    0x2b4da14f2613d8f4
-};
-
-#endif /* ZSTD_LDM_GEARTAB_H */
diff --git a/dep/zstd/lib/compress/zstd_opt.c b/dep/zstd/lib/compress/zstd_opt.c
deleted file mode 100644
index 1b1ddad42..000000000
--- a/dep/zstd/lib/compress/zstd_opt.c
+++ /dev/null
@@ -1,1446 +0,0 @@
-/*
- * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#include "zstd_compress_internal.h"
-#include "hist.h"
-#include "zstd_opt.h"
-
-
-#define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
-#define ZSTD_MAX_PRICE     (1<<30)
-
-#define ZSTD_PREDEF_THRESHOLD 1024   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
-
-
-/*-*************************************
-*  Price functions for optimal parser
-***************************************/
-
-#if 0    /* approximation at bit level (for tests) */
-#  define BITCOST_ACCURACY 0
-#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
-#  define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
-#elif 0  /* fractional bit accuracy (for tests) */
-#  define BITCOST_ACCURACY 8
-#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
-#  define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
-#else    /* opt==approx, ultra==accurate */
-#  define BITCOST_ACCURACY 8
-#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
-#  define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
-#endif
-
-MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
-{
-    return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
-}
-
-MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
-{
-    U32 const stat = rawStat + 1;
-    U32 const hb = ZSTD_highbit32(stat);
-    U32 const BWeight = hb * BITCOST_MULTIPLIER;
-    U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
-    U32 const weight = BWeight + FWeight;
-    assert(hb + BITCOST_ACCURACY < 31);
-    return weight;
-}
-
-#if (DEBUGLEVEL>=2)
-/* debugging function,
- * @return price in bytes as fractional value
- * for debug messages only */
-MEM_STATIC double ZSTD_fCost(U32 price)
-{
-    return (double)price / (BITCOST_MULTIPLIER*8);
-}
-#endif
-
-static int ZSTD_compressedLiterals(optState_t const* const optPtr)
-{
-    return optPtr->literalCompressionMode != ZSTD_ps_disable;
-}
-
-static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
-{
-    if (ZSTD_compressedLiterals(optPtr))
-        optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
-    optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
-    optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
-    optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
-}
-
-
-static U32 sum_u32(const unsigned table[], size_t nbElts)
-{
-    size_t n;
-    U32 total = 0;
-    for (n=0; n<nbElts; n++) {
-        total += table[n];
-    }
-    return total;
-}
-
-static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
-{
-    U32 s, sum=0;
-    DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
-    assert(shift < 30);
-    for (s=0; s<lastEltIndex+1; s++) {
-        table[s] = 1 + (table[s] >> shift);
-        sum += table[s];
-    }
-    return sum;
-}
-
-/* ZSTD_scaleStats() :
- * reduce all elements in table is sum too large
- * return the resulting sum of elements */
-static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
-{
-    U32 const prevsum = sum_u32(table, lastEltIndex+1);
-    U32 const factor = prevsum >> logTarget;
-    DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
-    assert(logTarget < 30);
-    if (factor <= 1) return prevsum;
-    return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
-}
-
-/* ZSTD_rescaleFreqs() :
- * if first block (detected by optPtr->litLengthSum == 0) : init statistics
- *    take hints from dictionary if there is one
- *    and init from zero if there is none,
- *    using src for literals stats, and baseline stats for sequence symbols
- * otherwise downscale existing stats, to be used as seed for next block.
- */
-static void
-ZSTD_rescaleFreqs(optState_t* const optPtr,
-            const BYTE* const src, size_t const srcSize,
-                  int const optLevel)
-{
-    int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
-    DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
-    optPtr->priceType = zop_dynamic;
-
-    if (optPtr->litLengthSum == 0) {  /* first block : init */
-        if (srcSize <= ZSTD_PREDEF_THRESHOLD) {  /* heuristic */
-            DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
-            optPtr->priceType = zop_predef;
-        }
-
-        assert(optPtr->symbolCosts != NULL);
-        if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
-            /* huffman table presumed generated by dictionary */
-            optPtr->priceType = zop_dynamic;
-
-            if (compressedLiterals) {
-                unsigned lit;
-                assert(optPtr->litFreq != NULL);
-                optPtr->litSum = 0;
-                for (lit=0; lit<=MaxLit; lit++) {
-                    U32 const scaleLog = 11;   /* scale to 2K */
-                    U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
-                    assert(bitCost <= scaleLog);
-                    optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
-                    optPtr->litSum += optPtr->litFreq[lit];
-            }   }
-
-            {   unsigned ll;
-                FSE_CState_t llstate;
-                FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
-                optPtr->litLengthSum = 0;
-                for (ll=0; ll<=MaxLL; ll++) {
-                    U32 const scaleLog = 10;   /* scale to 1K */
-                    U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
-                    assert(bitCost < scaleLog);
-                    optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
-                    optPtr->litLengthSum += optPtr->litLengthFreq[ll];
-            }   }
-
-            {   unsigned ml;
-                FSE_CState_t mlstate;
-                FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
-                optPtr->matchLengthSum = 0;
-                for (ml=0; ml<=MaxML; ml++) {
-                    U32 const scaleLog = 10;
-                    U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
-                    assert(bitCost < scaleLog);
-                    optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
-                    optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
-            }   }
-
-            {   unsigned of;
-                FSE_CState_t ofstate;
-                FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
-                optPtr->offCodeSum = 0;
-                for (of=0; of<=MaxOff; of++) {
-                    U32 const scaleLog = 10;
-                    U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
-                    assert(bitCost < scaleLog);
-                    optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
-                    optPtr->offCodeSum += optPtr->offCodeFreq[of];
-            }   }
-
-        } else {  /* not a dictionary */
-
-            assert(optPtr->litFreq != NULL);
-            if (compressedLiterals) {
-                unsigned lit = MaxLit;
-                HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
-                optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
-            }
-
-            {   unsigned const baseLLfreqs[MaxLL+1] = {
-                    4, 2, 1, 1, 1, 1, 1, 1,
-                    1, 1, 1, 1, 1, 1, 1, 1,
-                    1, 1, 1, 1, 1, 1, 1, 1,
-                    1, 1, 1, 1, 1, 1, 1, 1,
-                    1, 1, 1, 1
-                };
-                ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
-                optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
-            }
-
-            {   unsigned ml;
-                for (ml=0; ml<=MaxML; ml++)
-                    optPtr->matchLengthFreq[ml] = 1;
-            }
-            optPtr->matchLengthSum = MaxML+1;
-
-            {   unsigned const baseOFCfreqs[MaxOff+1] = {
-                    6, 2, 1, 1, 2, 3, 4, 4,
-                    4, 3, 2, 1, 1, 1, 1, 1,
-                    1, 1, 1, 1, 1, 1, 1, 1,
-                    1, 1, 1, 1, 1, 1, 1, 1
-                };
-                ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
-                optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
-            }
-
-
-        }
-
-    } else {   /* new block : re-use previous statistics, scaled down */
-
-        if (compressedLiterals)
-            optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
-        optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
-        optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
-        optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
-    }
-
-    ZSTD_setBasePrices(optPtr, optLevel);
-}
-
-/* ZSTD_rawLiteralsCost() :
- * price of literals (only) in specified segment (which length can be 0).
- * does not include price of literalLength symbol */
-static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
-                                const optState_t* const optPtr,
-                                int optLevel)
-{
-    if (litLength == 0) return 0;
-
-    if (!ZSTD_compressedLiterals(optPtr))
-        return (litLength << 3) * BITCOST_MULTIPLIER;  /* Uncompressed - 8 bytes per literal. */
-
-    if (optPtr->priceType == zop_predef)
-        return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
-
-    /* dynamic statistics */
-    {   U32 price = litLength * optPtr->litSumBasePrice;
-        U32 u;
-        for (u=0; u < litLength; u++) {
-            assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice);   /* literal cost should never be negative */
-            price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
-        }
-        return price;
-    }
-}
-
-/* ZSTD_litLengthPrice() :
- * cost of literalLength symbol */
-static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
-{
-    assert(litLength <= ZSTD_BLOCKSIZE_MAX);
-    if (optPtr->priceType == zop_predef)
-        return WEIGHT(litLength, optLevel);
-    /* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
-     * because it isn't representable in the zstd format. So instead just
-     * call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block
-     * would be all literals.
-     */
-    if (litLength == ZSTD_BLOCKSIZE_MAX)
-        return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
-
-    /* dynamic statistics */
-    {   U32 const llCode = ZSTD_LLcode(litLength);
-        return (LL_bits[llCode] * BITCOST_MULTIPLIER)
-             + optPtr->litLengthSumBasePrice
-             - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
-    }
-}
-
-/* ZSTD_getMatchPrice() :
- * Provides the cost of the match part (offset + matchLength) of a sequence
- * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
- * @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2
- * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
- */
-FORCE_INLINE_TEMPLATE U32
-ZSTD_getMatchPrice(U32 const offcode,
-                   U32 const matchLength,
-             const optState_t* const optPtr,
-                   int const optLevel)
-{
-    U32 price;
-    U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode));
-    U32 const mlBase = matchLength - MINMATCH;
-    assert(matchLength >= MINMATCH);
-
-    if (optPtr->priceType == zop_predef)  /* fixed scheme, do not use statistics */
-        return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
-
-    /* dynamic statistics */
-    price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
-    if ((optLevel<2) /*static*/ && offCode >= 20)
-        price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
-
-    /* match Length */
-    {   U32 const mlCode = ZSTD_MLcode(mlBase);
-        price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
-    }
-
-    price += BITCOST_MULTIPLIER / 5;   /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
-
-    DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
-    return price;
-}
-
-/* ZSTD_updateStats() :
- * assumption : literals + litLengtn <= iend */
-static void ZSTD_updateStats(optState_t* const optPtr,
-                             U32 litLength, const BYTE* literals,
-                             U32 offsetCode, U32 matchLength)
-{
-    /* literals */
-    if (ZSTD_compressedLiterals(optPtr)) {
-        U32 u;
-        for (u=0; u < litLength; u++)
-            optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
-        optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
-    }
-
-    /* literal Length */
-    {   U32 const llCode = ZSTD_LLcode(litLength);
-        optPtr->litLengthFreq[llCode]++;
-        optPtr->litLengthSum++;
-    }
-
-    /* offset code : expected to follow storeSeq() numeric representation */
-    {   U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode));
-        assert(offCode <= MaxOff);
-        optPtr->offCodeFreq[offCode]++;
-        optPtr->offCodeSum++;
-    }
-
-    /* match Length */
-    {   U32 const mlBase = matchLength - MINMATCH;
-        U32 const mlCode = ZSTD_MLcode(mlBase);
-        optPtr->matchLengthFreq[mlCode]++;
-        optPtr->matchLengthSum++;
-    }
-}
-
-
-/* ZSTD_readMINMATCH() :
- * function safe only for comparisons
- * assumption : memPtr must be at least 4 bytes before end of buffer */
-MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
-{
-    switch (length)
-    {
-    default :
-    case 4 : return MEM_read32(memPtr);
-    case 3 : if (MEM_isLittleEndian())
-                return MEM_read32(memPtr)<<8;
-             else
-                return MEM_read32(memPtr)>>8;
-    }
-}
-
-
-/* Update hashTable3 up to ip (excluded)
-   Assumption : always within prefix (i.e. not within extDict) */
-static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
-                                              U32* nextToUpdate3,
-                                              const BYTE* const ip)
-{
-    U32* const hashTable3 = ms->hashTable3;
-    U32 const hashLog3 = ms->hashLog3;
-    const BYTE* const base = ms->window.base;
-    U32 idx = *nextToUpdate3;
-    U32 const target = (U32)(ip - base);
-    size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
-    assert(hashLog3 > 0);
-
-    while(idx < target) {
-        hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
-        idx++;
-    }
-
-    *nextToUpdate3 = target;
-    return hashTable3[hash3];
-}
-
-
-/*-*************************************
-*  Binary Tree search
-***************************************/
-/** ZSTD_insertBt1() : add one or multiple positions to tree.
- * @param ip assumed <= iend-8 .
- * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
- * @return : nb of positions added */
-static U32 ZSTD_insertBt1(
-                const ZSTD_matchState_t* ms,
-                const BYTE* const ip, const BYTE* const iend,
-                U32 const target,
-                U32 const mls, const int extDict)
-{
-    const ZSTD_compressionParameters* const cParams = &ms->cParams;
-    U32*   const hashTable = ms->hashTable;
-    U32    const hashLog = cParams->hashLog;
-    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
-    U32*   const bt = ms->chainTable;
-    U32    const btLog  = cParams->chainLog - 1;
-    U32    const btMask = (1 << btLog) - 1;
-    U32 matchIndex = hashTable[h];
-    size_t commonLengthSmaller=0, commonLengthLarger=0;
-    const BYTE* const base = ms->window.base;
-    const BYTE* const dictBase = ms->window.dictBase;
-    const U32 dictLimit = ms->window.dictLimit;
-    const BYTE* const dictEnd = dictBase + dictLimit;
-    const BYTE* const prefixStart = base + dictLimit;
-    const BYTE* match;
-    const U32 curr = (U32)(ip-base);
-    const U32 btLow = btMask >= curr ? 0 : curr - btMask;
-    U32* smallerPtr = bt + 2*(curr&btMask);
-    U32* largerPtr  = smallerPtr + 1;
-    U32 dummy32;   /* to be nullified at the end */
-    /* windowLow is based on target because
-     * we only need positions that will be in the window at the end of the tree update.
-     */
-    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
-    U32 matchEndIdx = curr+8+1;
-    size_t bestLength = 8;
-    U32 nbCompares = 1U << cParams->searchLog;
-#ifdef ZSTD_C_PREDICT
-    U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
-    U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
-    predictedSmall += (predictedSmall>0);
-    predictedLarge += (predictedLarge>0);
-#endif /* ZSTD_C_PREDICT */
-
-    DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
-
-    assert(curr <= target);
-    assert(ip <= iend-8);   /* required for h calculation */
-    hashTable[h] = curr;   /* Update Hash Table */
-
-    assert(windowLow > 0);
-    for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
-        U32* const nextPtr = bt + 2*(matchIndex & btMask);
-        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
-        assert(matchIndex < curr);
-
-#ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
-        const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
-        if (matchIndex == predictedSmall) {
-            /* no need to check length, result known */
-            *smallerPtr = matchIndex;
-            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
-            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
-            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
-            predictedSmall = predictPtr[1] + (predictPtr[1]>0);
-            continue;
-        }
-        if (matchIndex == predictedLarge) {
-            *largerPtr = matchIndex;
-            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
-            largerPtr = nextPtr;
-            matchIndex = nextPtr[0];
-            predictedLarge = predictPtr[0] + (predictPtr[0]>0);
-            continue;
-        }
-#endif
-
-        if (!extDict || (matchIndex+matchLength >= dictLimit)) {
-            assert(matchIndex+matchLength >= dictLimit);   /* might be wrong if actually extDict */
-            match = base + matchIndex;
-            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
-        } else {
-            match = dictBase + matchIndex;
-            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
-            if (matchIndex+matchLength >= dictLimit)
-                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
-        }
-
-        if (matchLength > bestLength) {
-            bestLength = matchLength;
-            if (matchLength > matchEndIdx - matchIndex)
-                matchEndIdx = matchIndex + (U32)matchLength;
-        }
-
-        if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
-            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
-        }
-
-        if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
-            /* match is smaller than current */
-            *smallerPtr = matchIndex;             /* update smaller idx */
-            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
-            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
-            smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
-            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
-        } else {
-            /* match is larger than current */
-            *largerPtr = matchIndex;
-            commonLengthLarger = matchLength;
-            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
-            largerPtr = nextPtr;
-            matchIndex = nextPtr[0];
-    }   }
-
-    *smallerPtr = *largerPtr = 0;
-    {   U32 positions = 0;
-        if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
-        assert(matchEndIdx > curr + 8);
-        return MAX(positions, matchEndIdx - (curr + 8));
-    }
-}
-
-FORCE_INLINE_TEMPLATE
-void ZSTD_updateTree_internal(
-                ZSTD_matchState_t* ms,
-                const BYTE* const ip, const BYTE* const iend,
-                const U32 mls, const ZSTD_dictMode_e dictMode)
-{
-    const BYTE* const base = ms->window.base;
-    U32 const target = (U32)(ip - base);
-    U32 idx = ms->nextToUpdate;
-    DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
-                idx, target, dictMode);
-
-    while(idx < target) {
-        U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
-        assert(idx < (U32)(idx + forward));
-        idx += forward;
-    }
-    assert((size_t)(ip - base) <= (size_t)(U32)(-1));
-    assert((size_t)(iend - base) <= (size_t)(U32)(-1));
-    ms->nextToUpdate = target;
-}
-
-void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
-    ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
-}
-
-FORCE_INLINE_TEMPLATE
-U32 ZSTD_insertBtAndGetAllMatches (
-                    ZSTD_match_t* matches,   /* store result (found matches) in this table (presumed large enough) */
-                    ZSTD_matchState_t* ms,
-                    U32* nextToUpdate3,
-                    const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
-                    const U32 rep[ZSTD_REP_NUM],
-                    U32 const ll0,   /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
-                    const U32 lengthToBeat,
-                    U32 const mls /* template */)
-{
-    const ZSTD_compressionParameters* const cParams = &ms->cParams;
-    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
-    const BYTE* const base = ms->window.base;
-    U32 const curr = (U32)(ip-base);
-    U32 const hashLog = cParams->hashLog;
-    U32 const minMatch = (mls==3) ? 3 : 4;
-    U32* const hashTable = ms->hashTable;
-    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
-    U32 matchIndex  = hashTable[h];
-    U32* const bt   = ms->chainTable;
-    U32 const btLog = cParams->chainLog - 1;
-    U32 const btMask= (1U << btLog) - 1;
-    size_t commonLengthSmaller=0, commonLengthLarger=0;
-    const BYTE* const dictBase = ms->window.dictBase;
-    U32 const dictLimit = ms->window.dictLimit;
-    const BYTE* const dictEnd = dictBase + dictLimit;
-    const BYTE* const prefixStart = base + dictLimit;
-    U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
-    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
-    U32 const matchLow = windowLow ? windowLow : 1;
-    U32* smallerPtr = bt + 2*(curr&btMask);
-    U32* largerPtr  = bt + 2*(curr&btMask) + 1;
-    U32 matchEndIdx = curr+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
-    U32 dummy32;   /* to be nullified at the end */
-    U32 mnum = 0;
-    U32 nbCompares = 1U << cParams->searchLog;
-
-    const ZSTD_matchState_t* dms    = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
-    const ZSTD_compressionParameters* const dmsCParams =
-                                      dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
-    const BYTE* const dmsBase       = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
-    const BYTE* const dmsEnd        = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
-    U32         const dmsHighLimit  = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
-    U32         const dmsLowLimit   = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
-    U32         const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
-    U32         const dmsHashLog    = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
-    U32         const dmsBtLog      = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
-    U32         const dmsBtMask     = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
-    U32         const dmsBtLow      = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
-
-    size_t bestLength = lengthToBeat-1;
-    DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
-
-    /* check repCode */
-    assert(ll0 <= 1);   /* necessarily 1 or 0 */
-    {   U32 const lastR = ZSTD_REP_NUM + ll0;
-        U32 repCode;
-        for (repCode = ll0; repCode < lastR; repCode++) {
-            U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
-            U32 const repIndex = curr - repOffset;
-            U32 repLen = 0;
-            assert(curr >= dictLimit);
-            if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) {  /* equivalent to `curr > repIndex >= dictLimit` */
-                /* We must validate the repcode offset because when we're using a dictionary the
-                 * valid offset range shrinks when the dictionary goes out of bounds.
-                 */
-                if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
-                    repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
-                }
-            } else {  /* repIndex < dictLimit || repIndex >= curr */
-                const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
-                                             dmsBase + repIndex - dmsIndexDelta :
-                                             dictBase + repIndex;
-                assert(curr >= windowLow);
-                if ( dictMode == ZSTD_extDict
-                  && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow)  /* equivalent to `curr > repIndex >= windowLow` */
-                     & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
-                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
-                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
-                }
-                if (dictMode == ZSTD_dictMatchState
-                  && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `curr > repIndex >= dmsLowLimit` */
-                     & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
-                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
-                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
-            }   }
-            /* save longer solution */
-            if (repLen > bestLength) {
-                DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
-                            repCode, ll0, repOffset, repLen);
-                bestLength = repLen;
-                matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1);  /* expect value between 1 and 3 */
-                matches[mnum].len = (U32)repLen;
-                mnum++;
-                if ( (repLen > sufficient_len)
-                   | (ip+repLen == iLimit) ) {  /* best possible */
-                    return mnum;
-    }   }   }   }
-
-    /* HC3 match finder */
-    if ((mls == 3) /*static*/ && (bestLength < mls)) {
-        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
-        if ((matchIndex3 >= matchLow)
-          & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
-            size_t mlen;
-            if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
-                const BYTE* const match = base + matchIndex3;
-                mlen = ZSTD_count(ip, match, iLimit);
-            } else {
-                const BYTE* const match = dictBase + matchIndex3;
-                mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
-            }
-
-            /* save best solution */
-            if (mlen >= mls /* == 3 > bestLength */) {
-                DEBUGLOG(8, "found small match with hlog3, of length %u",
-                            (U32)mlen);
-                bestLength = mlen;
-                assert(curr > matchIndex3);
-                assert(mnum==0);  /* no prior solution */
-                matches[0].off = STORE_OFFSET(curr - matchIndex3);
-                matches[0].len = (U32)mlen;
-                mnum = 1;
-                if ( (mlen > sufficient_len) |
-                     (ip+mlen == iLimit) ) {  /* best possible length */
-                    ms->nextToUpdate = curr+1;  /* skip insertion */
-                    return 1;
-        }   }   }
-        /* no dictMatchState lookup: dicts don't have a populated HC3 table */
-    }  /* if (mls == 3) */
-
-    hashTable[h] = curr;   /* Update Hash Table */
-
-    for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
-        U32* const nextPtr = bt + 2*(matchIndex & btMask);
-        const BYTE* match;
-        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
-        assert(curr > matchIndex);
-
-        if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
-            assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
-            match = base + matchIndex;
-            if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
-            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
-        } else {
-            match = dictBase + matchIndex;
-            assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
-            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
-            if (matchIndex+matchLength >= dictLimit)
-                match = base + matchIndex;   /* prepare for match[matchLength] read */
-        }
-
-        if (matchLength > bestLength) {
-            DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
-                    (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
-            assert(matchEndIdx > matchIndex);
-            if (matchLength > matchEndIdx - matchIndex)
-                matchEndIdx = matchIndex + (U32)matchLength;
-            bestLength = matchLength;
-            matches[mnum].off = STORE_OFFSET(curr - matchIndex);
-            matches[mnum].len = (U32)matchLength;
-            mnum++;
-            if ( (matchLength > ZSTD_OPT_NUM)
-               | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
-                if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
-                break; /* drop, to preserve bt consistency (miss a little bit of compression) */
-        }   }
-
-        if (match[matchLength] < ip[matchLength]) {
-            /* match smaller than current */
-            *smallerPtr = matchIndex;             /* update smaller idx */
-            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
-            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
-            smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
-            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
-        } else {
-            *largerPtr = matchIndex;
-            commonLengthLarger = matchLength;
-            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
-            largerPtr = nextPtr;
-            matchIndex = nextPtr[0];
-    }   }
-
-    *smallerPtr = *largerPtr = 0;
-
-    assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
-    if (dictMode == ZSTD_dictMatchState && nbCompares) {
-        size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
-        U32 dictMatchIndex = dms->hashTable[dmsH];
-        const U32* const dmsBt = dms->chainTable;
-        commonLengthSmaller = commonLengthLarger = 0;
-        for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
-            const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
-            size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
-            const BYTE* match = dmsBase + dictMatchIndex;
-            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
-            if (dictMatchIndex+matchLength >= dmsHighLimit)
-                match = base + dictMatchIndex + dmsIndexDelta;   /* to prepare for next usage of match[matchLength] */
-
-            if (matchLength > bestLength) {
-                matchIndex = dictMatchIndex + dmsIndexDelta;
-                DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
-                        (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
-                if (matchLength > matchEndIdx - matchIndex)
-                    matchEndIdx = matchIndex + (U32)matchLength;
-                bestLength = matchLength;
-                matches[mnum].off = STORE_OFFSET(curr - matchIndex);
-                matches[mnum].len = (U32)matchLength;
-                mnum++;
-                if ( (matchLength > ZSTD_OPT_NUM)
-                   | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
-                    break;   /* drop, to guarantee consistency (miss a little bit of compression) */
-            }   }
-
-            if (dictMatchIndex <= dmsBtLow) { break; }   /* beyond tree size, stop the search */
-            if (match[matchLength] < ip[matchLength]) {
-                commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
-                dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
-            } else {
-                /* match is larger than current */
-                commonLengthLarger = matchLength;
-                dictMatchIndex = nextPtr[0];
-    }   }   }  /* if (dictMode == ZSTD_dictMatchState) */
-
-    assert(matchEndIdx > curr+8);
-    ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
-    return mnum;
-}
-
-typedef U32 (*ZSTD_getAllMatchesFn)(
-    ZSTD_match_t*,
-    ZSTD_matchState_t*,
-    U32*,
-    const BYTE*,
-    const BYTE*,
-    const U32 rep[ZSTD_REP_NUM],
-    U32 const ll0,
-    U32 const lengthToBeat);
-
-FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
-        ZSTD_match_t* matches,
-        ZSTD_matchState_t* ms,
-        U32* nextToUpdate3,
-        const BYTE* ip,
-        const BYTE* const iHighLimit,
-        const U32 rep[ZSTD_REP_NUM],
-        U32 const ll0,
-        U32 const lengthToBeat,
-        const ZSTD_dictMode_e dictMode,
-        const U32 mls)
-{
-    assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
-    DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
-    if (ip < ms->window.base + ms->nextToUpdate)
-        return 0;   /* skipped area */
-    ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
-    return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
-}
-
-#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
-
-#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls)            \
-    static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)(      \
-            ZSTD_match_t* matches,                             \
-            ZSTD_matchState_t* ms,                             \
-            U32* nextToUpdate3,                                \
-            const BYTE* ip,                                    \
-            const BYTE* const iHighLimit,                      \
-            const U32 rep[ZSTD_REP_NUM],                       \
-            U32 const ll0,                                     \
-            U32 const lengthToBeat)                            \
-    {                                                          \
-        return ZSTD_btGetAllMatches_internal(                  \
-                matches, ms, nextToUpdate3, ip, iHighLimit,    \
-                rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
-    }
-
-#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode)  \
-    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3)  \
-    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4)  \
-    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5)  \
-    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
-
-GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
-GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
-GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
-
-#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode)  \
-    {                                            \
-        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
-        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
-        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
-        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6)  \
-    }
-
-static ZSTD_getAllMatchesFn
-ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
-{
-    ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
-        ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
-        ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
-        ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
-    };
-    U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
-    assert((U32)dictMode < 3);
-    assert(mls - 3 < 4);
-    return getAllMatchesFns[(int)dictMode][mls - 3];
-}
-
-/*************************
-*  LDM helper functions  *
-*************************/
-
-/* Struct containing info needed to make decision about ldm inclusion */
-typedef struct {
-    rawSeqStore_t seqStore;   /* External match candidates store for this block */
-    U32 startPosInBlock;      /* Start position of the current match candidate */
-    U32 endPosInBlock;        /* End position of the current match candidate */
-    U32 offset;               /* Offset of the match candidate */
-} ZSTD_optLdm_t;
-
-/* ZSTD_optLdm_skipRawSeqStoreBytes():
- * Moves forward in @rawSeqStore by @nbBytes,
- * which will update the fields 'pos' and 'posInSequence'.
- */
-static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
-{
-    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
-    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
-        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
-        if (currPos >= currSeq.litLength + currSeq.matchLength) {
-            currPos -= currSeq.litLength + currSeq.matchLength;
-            rawSeqStore->pos++;
-        } else {
-            rawSeqStore->posInSequence = currPos;
-            break;
-        }
-    }
-    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
-        rawSeqStore->posInSequence = 0;
-    }
-}
-
-/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
- * Calculates the beginning and end of the next match in the current block.
- * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
- */
-static void
-ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
-                                       U32 blockBytesRemaining)
-{
-    rawSeq currSeq;
-    U32 currBlockEndPos;
-    U32 literalsBytesRemaining;
-    U32 matchBytesRemaining;
-
-    /* Setting match end position to MAX to ensure we never use an LDM during this block */
-    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
-        optLdm->startPosInBlock = UINT_MAX;
-        optLdm->endPosInBlock = UINT_MAX;
-        return;
-    }
-    /* Calculate appropriate bytes left in matchLength and litLength
-     * after adjusting based on ldmSeqStore->posInSequence */
-    currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
-    assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
-    currBlockEndPos = currPosInBlock + blockBytesRemaining;
-    literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
-            currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
-            0;
-    matchBytesRemaining = (literalsBytesRemaining == 0) ?
-            currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
-            currSeq.matchLength;
-
-    /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
-    if (literalsBytesRemaining >= blockBytesRemaining) {
-        optLdm->startPosInBlock = UINT_MAX;
-        optLdm->endPosInBlock = UINT_MAX;
-        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
-        return;
-    }
-
-    /* Matches may be < MINMATCH by this process. In that case, we will reject them
-       when we are deciding whether or not to add the ldm */
-    optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
-    optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
-    optLdm->offset = currSeq.offset;
-
-    if (optLdm->endPosInBlock > currBlockEndPos) {
-        /* Match ends after the block ends, we can't use the whole match */
-        optLdm->endPosInBlock = currBlockEndPos;
-        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
-    } else {
-        /* Consume nb of bytes equal to size of sequence left */
-        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
-    }
-}
-
-/* ZSTD_optLdm_maybeAddMatch():
- * Adds a match if it's long enough,
- * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
- * into 'matches'. Maintains the correct ordering of 'matches'.
- */
-static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
-                                      const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
-{
-    U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
-    /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
-    U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
-
-    /* Ensure that current block position is not outside of the match */
-    if (currPosInBlock < optLdm->startPosInBlock
-      || currPosInBlock >= optLdm->endPosInBlock
-      || candidateMatchLength < MINMATCH) {
-        return;
-    }
-
-    if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
-        U32 const candidateOffCode = STORE_OFFSET(optLdm->offset);
-        DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
-                 candidateOffCode, candidateMatchLength, currPosInBlock);
-        matches[*nbMatches].len = candidateMatchLength;
-        matches[*nbMatches].off = candidateOffCode;
-        (*nbMatches)++;
-    }
-}
-
-/* ZSTD_optLdm_processMatchCandidate():
- * Wrapper function to update ldm seq store and call ldm functions as necessary.
- */
-static void
-ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
-                                  ZSTD_match_t* matches, U32* nbMatches,
-                                  U32 currPosInBlock, U32 remainingBytes)
-{
-    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
-        return;
-    }
-
-    if (currPosInBlock >= optLdm->endPosInBlock) {
-        if (currPosInBlock > optLdm->endPosInBlock) {
-            /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
-             * at the end of a match from the ldm seq store, and will often be some bytes
-             * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
-             */
-            U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
-            ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
-        }
-        ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
-    }
-    ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
-}
-
-
-/*-*******************************
-*  Optimal parser
-*********************************/
-
-static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
-{
-    return sol.litlen + sol.mlen;
-}
-
-#if 0 /* debug */
-
-static void
-listStats(const U32* table, int lastEltID)
-{
-    int const nbElts = lastEltID + 1;
-    int enb;
-    for (enb=0; enb < nbElts; enb++) {
-        (void)table;
-        /* RAWLOG(2, "%3i:%3i,  ", enb, table[enb]); */
-        RAWLOG(2, "%4i,", table[enb]);
-    }
-    RAWLOG(2, " \n");
-}
-
-#endif
-
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
-                               seqStore_t* seqStore,
-                               U32 rep[ZSTD_REP_NUM],
-                         const void* src, size_t srcSize,
-                         const int optLevel,
-                         const ZSTD_dictMode_e dictMode)
-{
-    optState_t* const optStatePtr = &ms->opt;
-    const BYTE* const istart = (const BYTE*)src;
-    const BYTE* ip = istart;
-    const BYTE* anchor = istart;
-    const BYTE* const iend = istart + srcSize;
-    const BYTE* const ilimit = iend - 8;
-    const BYTE* const base = ms->window.base;
-    const BYTE* const prefixStart = base + ms->window.dictLimit;
-    const ZSTD_compressionParameters* const cParams = &ms->cParams;
-
-    ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
-
-    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
-    U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
-    U32 nextToUpdate3 = ms->nextToUpdate;
-
-    ZSTD_optimal_t* const opt = optStatePtr->priceTable;
-    ZSTD_match_t* const matches = optStatePtr->matchTable;
-    ZSTD_optimal_t lastSequence;
-    ZSTD_optLdm_t optLdm;
-
-    optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
-    optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
-    ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
-
-    /* init */
-    DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
-                (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
-    assert(optLevel <= 2);
-    ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
-    ip += (ip==prefixStart);
-
-    /* Match Loop */
-    while (ip < ilimit) {
-        U32 cur, last_pos = 0;
-
-        /* find first match */
-        {   U32 const litlen = (U32)(ip - anchor);
-            U32 const ll0 = !litlen;
-            U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
-            ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
-                                              (U32)(ip-istart), (U32)(iend - ip));
-            if (!nbMatches) { ip++; continue; }
-
-            /* initialize opt[0] */
-            { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
-            opt[0].mlen = 0;  /* means is_a_literal */
-            opt[0].litlen = litlen;
-            /* We don't need to include the actual price of the literals because
-             * it is static for the duration of the forward pass, and is included
-             * in every price. We include the literal length to avoid negative
-             * prices when we subtract the previous literal length.
-             */
-            opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
-
-            /* large match -> immediate encoding */
-            {   U32 const maxML = matches[nbMatches-1].len;
-                U32 const maxOffcode = matches[nbMatches-1].off;
-                DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
-                            nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart));
-
-                if (maxML > sufficient_len) {
-                    lastSequence.litlen = litlen;
-                    lastSequence.mlen = maxML;
-                    lastSequence.off = maxOffcode;
-                    DEBUGLOG(6, "large match (%u>%u), immediate encoding",
-                                maxML, sufficient_len);
-                    cur = 0;
-                    last_pos = ZSTD_totalLen(lastSequence);
-                    goto _shortestPath;
-            }   }
-
-            /* set prices for first matches starting position == 0 */
-            assert(opt[0].price >= 0);
-            {   U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
-                U32 pos;
-                U32 matchNb;
-                for (pos = 1; pos < minMatch; pos++) {
-                    opt[pos].price = ZSTD_MAX_PRICE;   /* mlen, litlen and price will be fixed during forward scanning */
-                }
-                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
-                    U32 const offcode = matches[matchNb].off;
-                    U32 const end = matches[matchNb].len;
-                    for ( ; pos <= end ; pos++ ) {
-                        U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel);
-                        U32 const sequencePrice = literalsPrice + matchPrice;
-                        DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
-                                    pos, ZSTD_fCost(sequencePrice));
-                        opt[pos].mlen = pos;
-                        opt[pos].off = offcode;
-                        opt[pos].litlen = litlen;
-                        opt[pos].price = (int)sequencePrice;
-                }   }
-                last_pos = pos-1;
-            }
-        }
-
-        /* check further positions */
-        for (cur = 1; cur <= last_pos; cur++) {
-            const BYTE* const inr = ip + cur;
-            assert(cur < ZSTD_OPT_NUM);
-            DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
-
-            /* Fix current position with one literal if cheaper */
-            {   U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
-                int const price = opt[cur-1].price
-                                + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
-                                + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
-                                - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
-                assert(price < 1000000000); /* overflow check */
-                if (price <= opt[cur].price) {
-                    DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
-                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
-                                opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
-                    opt[cur].mlen = 0;
-                    opt[cur].off = 0;
-                    opt[cur].litlen = litlen;
-                    opt[cur].price = price;
-                } else {
-                    DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
-                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
-                                opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
-                }
-            }
-
-            /* Set the repcodes of the current position. We must do it here
-             * because we rely on the repcodes of the 2nd to last sequence being
-             * correct to set the next chunks repcodes during the backward
-             * traversal.
-             */
-            ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
-            assert(cur >= opt[cur].mlen);
-            if (opt[cur].mlen != 0) {
-                U32 const prev = cur - opt[cur].mlen;
-                repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
-                ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
-            } else {
-                ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
-            }
-
-            /* last match must start at a minimum distance of 8 from oend */
-            if (inr > ilimit) continue;
-
-            if (cur == last_pos) break;
-
-            if ( (optLevel==0) /*static_test*/
-              && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
-                DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
-                continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
-            }
-
-            assert(opt[cur].price >= 0);
-            {   U32 const ll0 = (opt[cur].mlen != 0);
-                U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
-                U32 const previousPrice = (U32)opt[cur].price;
-                U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
-                U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
-                U32 matchNb;
-
-                ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
-                                                  (U32)(inr-istart), (U32)(iend-inr));
-
-                if (!nbMatches) {
-                    DEBUGLOG(7, "rPos:%u : no match found", cur);
-                    continue;
-                }
-
-                {   U32 const maxML = matches[nbMatches-1].len;
-                    DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
-                                inr-istart, cur, nbMatches, maxML);
-
-                    if ( (maxML > sufficient_len)
-                      || (cur + maxML >= ZSTD_OPT_NUM) ) {
-                        lastSequence.mlen = maxML;
-                        lastSequence.off = matches[nbMatches-1].off;
-                        lastSequence.litlen = litlen;
-                        cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0;  /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
-                        last_pos = cur + ZSTD_totalLen(lastSequence);
-                        if (cur > ZSTD_OPT_NUM) cur = 0;   /* underflow => first match */
-                        goto _shortestPath;
-                }   }
-
-                /* set prices using matches found at position == cur */
-                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
-                    U32 const offset = matches[matchNb].off;
-                    U32 const lastML = matches[matchNb].len;
-                    U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
-                    U32 mlen;
-
-                    DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
-                                matchNb, matches[matchNb].off, lastML, litlen);
-
-                    for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
-                        U32 const pos = cur + mlen;
-                        int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
-
-                        if ((pos > last_pos) || (price < opt[pos].price)) {
-                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
-                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
-                            while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }   /* fill empty positions */
-                            opt[pos].mlen = mlen;
-                            opt[pos].off = offset;
-                            opt[pos].litlen = litlen;
-                            opt[pos].price = price;
-                        } else {
-                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
-                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
-                            if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
-                        }
-            }   }   }
-        }  /* for (cur = 1; cur <= last_pos; cur++) */
-
-        lastSequence = opt[last_pos];
-        cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0;  /* single sequence, and it starts before `ip` */
-        assert(cur < ZSTD_OPT_NUM);  /* control overflow*/
-
-_shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
-        assert(opt[0].mlen == 0);
-
-        /* Set the next chunk's repcodes based on the repcodes of the beginning
-         * of the last match, and the last sequence. This avoids us having to
-         * update them while traversing the sequences.
-         */
-        if (lastSequence.mlen != 0) {
-            repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
-            ZSTD_memcpy(rep, &reps, sizeof(reps));
-        } else {
-            ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
-        }
-
-        {   U32 const storeEnd = cur + 1;
-            U32 storeStart = storeEnd;
-            U32 seqPos = cur;
-
-            DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
-                        last_pos, cur); (void)last_pos;
-            assert(storeEnd < ZSTD_OPT_NUM);
-            DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
-                        storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
-            opt[storeEnd] = lastSequence;
-            while (seqPos > 0) {
-                U32 const backDist = ZSTD_totalLen(opt[seqPos]);
-                storeStart--;
-                DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
-                            seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
-                opt[storeStart] = opt[seqPos];
-                seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
-            }
-
-            /* save sequences */
-            DEBUGLOG(6, "sending selected sequences into seqStore")
-            {   U32 storePos;
-                for (storePos=storeStart; storePos <= storeEnd; storePos++) {
-                    U32 const llen = opt[storePos].litlen;
-                    U32 const mlen = opt[storePos].mlen;
-                    U32 const offCode = opt[storePos].off;
-                    U32 const advance = llen + mlen;
-                    DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
-                                anchor - istart, (unsigned)llen, (unsigned)mlen);
-
-                    if (mlen==0) {  /* only literals => must be last "sequence", actually starting a new stream of sequences */
-                        assert(storePos == storeEnd);   /* must be last sequence */
-                        ip = anchor + llen;     /* last "sequence" is a bunch of literals => don't progress anchor */
-                        continue;   /* will finish */
-                    }
-
-                    assert(anchor + llen <= iend);
-                    ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
-                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen);
-                    anchor += advance;
-                    ip = anchor;
-            }   }
-            ZSTD_setBasePrices(optStatePtr, optLevel);
-        }
-    }   /* while (ip < ilimit) */
-
-    /* Return the last literals size */
-    return (size_t)(iend - anchor);
-}
-
-static size_t ZSTD_compressBlock_opt0(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
-{
-    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
-}
-
-static size_t ZSTD_compressBlock_opt2(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
-{
-    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
-}
-
-size_t ZSTD_compressBlock_btopt(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        const void* src, size_t srcSize)
-{
-    DEBUGLOG(5, "ZSTD_compressBlock_btopt");
-    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
-}
-
-
-
-
-/* ZSTD_initStats_ultra():
- * make a first compression pass, just to seed stats with more accurate starting values.
- * only works on first block, with no dictionary and no ldm.
- * this function cannot error, hence its contract must be respected.
- */
-static void
-ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
-                     seqStore_t* seqStore,
-                     U32 rep[ZSTD_REP_NUM],
-               const void* src, size_t srcSize)
-{
-    U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
-    ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
-
-    DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
-    assert(ms->opt.litLengthSum == 0);    /* first block */
-    assert(seqStore->sequences == seqStore->sequencesStart);   /* no ldm */
-    assert(ms->window.dictLimit == ms->window.lowLimit);   /* no dictionary */
-    assert(ms->window.dictLimit - ms->nextToUpdate <= 1);  /* no prefix (note: intentional overflow, defined as 2-complement) */
-
-    ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict);   /* generate stats into ms->opt*/
-
-    /* invalidate first scan from history */
-    ZSTD_resetSeqStore(seqStore);
-    ms->window.base -= srcSize;
-    ms->window.dictLimit += (U32)srcSize;
-    ms->window.lowLimit = ms->window.dictLimit;
-    ms->nextToUpdate = ms->window.dictLimit;
-
-}
-
-size_t ZSTD_compressBlock_btultra(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        const void* src, size_t srcSize)
-{
-    DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
-    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
-}
-
-size_t ZSTD_compressBlock_btultra2(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        const void* src, size_t srcSize)
-{
-    U32 const curr = (U32)((const BYTE*)src - ms->window.base);
-    DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
-
-    /* 2-pass strategy:
-     * this strategy makes a first pass over first block to collect statistics
-     * and seed next round's statistics with it.
-     * After 1st pass, function forgets everything, and starts a new block.
-     * Consequently, this can only work if no data has been previously loaded in tables,
-     * aka, no dictionary, no prefix, no ldm preprocessing.
-     * The compression ratio gain is generally small (~0.5% on first block),
-     * the cost is 2x cpu time on first block. */
-    assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
-    if ( (ms->opt.litLengthSum==0)   /* first block */
-      && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
-      && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
-      && (curr == ms->window.dictLimit)   /* start of frame, nothing already loaded nor skipped */
-      && (srcSize > ZSTD_PREDEF_THRESHOLD)
-      ) {
-        ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
-    }
-
-    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
-}
-
-size_t ZSTD_compressBlock_btopt_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        const void* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
-}
-
-size_t ZSTD_compressBlock_btultra_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        const void* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
-}
-
-size_t ZSTD_compressBlock_btopt_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        const void* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
-}
-
-size_t ZSTD_compressBlock_btultra_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        const void* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
-}
-
-/* note : no btultra2 variant for extDict nor dictMatchState,
- * because btultra2 is not meant to work with dictionaries
- * and is only specific for the first block (no prefix) */
diff --git a/dep/zstd/lib/compress/zstd_opt.h b/dep/zstd/lib/compress/zstd_opt.h
deleted file mode 100644
index 627255f53..000000000
--- a/dep/zstd/lib/compress/zstd_opt.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef ZSTD_OPT_H
-#define ZSTD_OPT_H
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-#include "zstd_compress_internal.h"
-
-/* used in ZSTD_loadDictionaryContent() */
-void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend);
-
-size_t ZSTD_compressBlock_btopt(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_btultra(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_btultra2(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-
-
-size_t ZSTD_compressBlock_btopt_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_btultra_dictMatchState(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-
-size_t ZSTD_compressBlock_btopt_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_btultra_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
-
-        /* note : no btultra2 variant for extDict nor dictMatchState,
-         * because btultra2 is not meant to work with dictionaries
-         * and is only specific for the first block (no prefix) */
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif /* ZSTD_OPT_H */
diff --git a/dep/zstd/lib/compress/zstdmt_compress.c b/dep/zstd/lib/compress/zstdmt_compress.c
deleted file mode 100644
index 6bc14b035..000000000
--- a/dep/zstd/lib/compress/zstdmt_compress.c
+++ /dev/null
@@ -1,1859 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-
-/* ======   Compiler specifics   ====== */
-#if defined(_MSC_VER)
-#  pragma warning(disable : 4204)   /* disable: C4204: non-constant aggregate initializer */
-#endif
-
-
-/* ======   Constants   ====== */
-#define ZSTDMT_OVERLAPLOG_DEFAULT 0
-
-
-/* ======   Dependencies   ====== */
-#include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
-#include "../common/mem.h"         /* MEM_STATIC */
-#include "../common/pool.h"        /* threadpool */
-#include "../common/threading.h"   /* mutex */
-#include "zstd_compress_internal.h"  /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
-#include "zstd_ldm.h"
-#include "zstdmt_compress.h"
-
-/* Guards code to support resizing the SeqPool.
- * We will want to resize the SeqPool to save memory in the future.
- * Until then, comment the code out since it is unused.
- */
-#define ZSTD_RESIZE_SEQPOOL 0
-
-/* ======   Debug   ====== */
-#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) \
-    && !defined(_MSC_VER) \
-    && !defined(__MINGW32__)
-
-#  include <stdio.h>
-#  include <unistd.h>
-#  include <sys/times.h>
-
-#  define DEBUG_PRINTHEX(l,p,n) {            \
-    unsigned debug_u;                        \
-    for (debug_u=0; debug_u<(n); debug_u++)  \
-        RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
-    RAWLOG(l, " \n");                        \
-}
-
-static unsigned long long GetCurrentClockTimeMicroseconds(void)
-{
-   static clock_t _ticksPerSecond = 0;
-   if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK);
-
-   {   struct tms junk; clock_t newTicks = (clock_t) times(&junk);
-       return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond);
-}  }
-
-#define MUTEX_WAIT_TIME_DLEVEL 6
-#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) {          \
-    if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) {   \
-        unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
-        ZSTD_pthread_mutex_lock(mutex);           \
-        {   unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
-            unsigned long long const elapsedTime = (afterTime-beforeTime); \
-            if (elapsedTime > 1000) {  /* or whatever threshold you like; I'm using 1 millisecond here */ \
-                DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
-                   elapsedTime, #mutex);          \
-        }   }                                     \
-    } else {                                      \
-        ZSTD_pthread_mutex_lock(mutex);           \
-    }                                             \
-}
-
-#else
-
-#  define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m)
-#  define DEBUG_PRINTHEX(l,p,n) {}
-
-#endif
-
-
-/* =====   Buffer Pool   ===== */
-/* a single Buffer Pool can be invoked from multiple threads in parallel */
-
-typedef struct buffer_s {
-    void* start;
-    size_t capacity;
-} buffer_t;
-
-static const buffer_t g_nullBuffer = { NULL, 0 };
-
-typedef struct ZSTDMT_bufferPool_s {
-    ZSTD_pthread_mutex_t poolMutex;
-    size_t bufferSize;
-    unsigned totalBuffers;
-    unsigned nbBuffers;
-    ZSTD_customMem cMem;
-    buffer_t bTable[1];   /* variable size */
-} ZSTDMT_bufferPool;
-
-static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_customMem cMem)
-{
-    ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc(
-        sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
-    if (bufPool==NULL) return NULL;
-    if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) {
-        ZSTD_customFree(bufPool, cMem);
-        return NULL;
-    }
-    bufPool->bufferSize = 64 KB;
-    bufPool->totalBuffers = maxNbBuffers;
-    bufPool->nbBuffers = 0;
-    bufPool->cMem = cMem;
-    return bufPool;
-}
-
-static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
-{
-    unsigned u;
-    DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
-    if (!bufPool) return;   /* compatibility with free on NULL */
-    for (u=0; u<bufPool->totalBuffers; u++) {
-        DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start);
-        ZSTD_customFree(bufPool->bTable[u].start, bufPool->cMem);
-    }
-    ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
-    ZSTD_customFree(bufPool, bufPool->cMem);
-}
-
-/* only works at initialization, not during compression */
-static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
-{
-    size_t const poolSize = sizeof(*bufPool)
-                          + (bufPool->totalBuffers - 1) * sizeof(buffer_t);
-    unsigned u;
-    size_t totalBufferSize = 0;
-    ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
-    for (u=0; u<bufPool->totalBuffers; u++)
-        totalBufferSize += bufPool->bTable[u].capacity;
-    ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
-
-    return poolSize + totalBufferSize;
-}
-
-/* ZSTDMT_setBufferSize() :
- * all future buffers provided by this buffer pool will have _at least_ this size
- * note : it's better for all buffers to have same size,
- * as they become freely interchangeable, reducing malloc/free usages and memory fragmentation */
-static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const bSize)
-{
-    ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
-    DEBUGLOG(4, "ZSTDMT_setBufferSize: bSize = %u", (U32)bSize);
-    bufPool->bufferSize = bSize;
-    ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
-}
-
-
-static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, unsigned maxNbBuffers)
-{
-    if (srcBufPool==NULL) return NULL;
-    if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */
-        return srcBufPool;
-    /* need a larger buffer pool */
-    {   ZSTD_customMem const cMem = srcBufPool->cMem;
-        size_t const bSize = srcBufPool->bufferSize;   /* forward parameters */
-        ZSTDMT_bufferPool* newBufPool;
-        ZSTDMT_freeBufferPool(srcBufPool);
-        newBufPool = ZSTDMT_createBufferPool(maxNbBuffers, cMem);
-        if (newBufPool==NULL) return newBufPool;
-        ZSTDMT_setBufferSize(newBufPool, bSize);
-        return newBufPool;
-    }
-}
-
-/** ZSTDMT_getBuffer() :
- *  assumption : bufPool must be valid
- * @return : a buffer, with start pointer and size
- *  note: allocation may fail, in this case, start==NULL and size==0 */
-static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
-{
-    size_t const bSize = bufPool->bufferSize;
-    DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize);
-    ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
-    if (bufPool->nbBuffers) {   /* try to use an existing buffer */
-        buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)];
-        size_t const availBufferSize = buf.capacity;
-        bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer;
-        if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) {
-            /* large enough, but not too much */
-            DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u",
-                        bufPool->nbBuffers, (U32)buf.capacity);
-            ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
-            return buf;
-        }
-        /* size conditions not respected : scratch this buffer, create new one */
-        DEBUGLOG(5, "ZSTDMT_getBuffer: existing buffer does not meet size conditions => freeing");
-        ZSTD_customFree(buf.start, bufPool->cMem);
-    }
-    ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
-    /* create new buffer */
-    DEBUGLOG(5, "ZSTDMT_getBuffer: create a new buffer");
-    {   buffer_t buffer;
-        void* const start = ZSTD_customMalloc(bSize, bufPool->cMem);
-        buffer.start = start;   /* note : start can be NULL if malloc fails ! */
-        buffer.capacity = (start==NULL) ? 0 : bSize;
-        if (start==NULL) {
-            DEBUGLOG(5, "ZSTDMT_getBuffer: buffer allocation failure !!");
-        } else {
-            DEBUGLOG(5, "ZSTDMT_getBuffer: created buffer of size %u", (U32)bSize);
-        }
-        return buffer;
-    }
-}
-
-#if ZSTD_RESIZE_SEQPOOL
-/** ZSTDMT_resizeBuffer() :
- * assumption : bufPool must be valid
- * @return : a buffer that is at least the buffer pool buffer size.
- *           If a reallocation happens, the data in the input buffer is copied.
- */
-static buffer_t ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buffer)
-{
-    size_t const bSize = bufPool->bufferSize;
-    if (buffer.capacity < bSize) {
-        void* const start = ZSTD_customMalloc(bSize, bufPool->cMem);
-        buffer_t newBuffer;
-        newBuffer.start = start;
-        newBuffer.capacity = start == NULL ? 0 : bSize;
-        if (start != NULL) {
-            assert(newBuffer.capacity >= buffer.capacity);
-            ZSTD_memcpy(newBuffer.start, buffer.start, buffer.capacity);
-            DEBUGLOG(5, "ZSTDMT_resizeBuffer: created buffer of size %u", (U32)bSize);
-            return newBuffer;
-        }
-        DEBUGLOG(5, "ZSTDMT_resizeBuffer: buffer allocation failure !!");
-    }
-    return buffer;
-}
-#endif
-
-/* store buffer for later re-use, up to pool capacity */
-static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
-{
-    DEBUGLOG(5, "ZSTDMT_releaseBuffer");
-    if (buf.start == NULL) return;   /* compatible with release on NULL */
-    ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
-    if (bufPool->nbBuffers < bufPool->totalBuffers) {
-        bufPool->bTable[bufPool->nbBuffers++] = buf;  /* stored for later use */
-        DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u",
-                    (U32)buf.capacity, (U32)(bufPool->nbBuffers-1));
-        ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
-        return;
-    }
-    ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
-    /* Reached bufferPool capacity (should not happen) */
-    DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing ");
-    ZSTD_customFree(buf.start, bufPool->cMem);
-}
-
-/* We need 2 output buffers per worker since each dstBuff must be flushed after it is released.
- * The 3 additional buffers are as follows:
- *   1 buffer for input loading
- *   1 buffer for "next input" when submitting current one
- *   1 buffer stuck in queue */
-#define BUF_POOL_MAX_NB_BUFFERS(nbWorkers) 2*nbWorkers + 3
-
-/* After a worker releases its rawSeqStore, it is immediately ready for reuse.
- * So we only need one seq buffer per worker. */
-#define SEQ_POOL_MAX_NB_BUFFERS(nbWorkers) nbWorkers
-
-/* =====   Seq Pool Wrapper   ====== */
-
-typedef ZSTDMT_bufferPool ZSTDMT_seqPool;
-
-static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool)
-{
-    return ZSTDMT_sizeof_bufferPool(seqPool);
-}
-
-static rawSeqStore_t bufferToSeq(buffer_t buffer)
-{
-    rawSeqStore_t seq = kNullRawSeqStore;
-    seq.seq = (rawSeq*)buffer.start;
-    seq.capacity = buffer.capacity / sizeof(rawSeq);
-    return seq;
-}
-
-static buffer_t seqToBuffer(rawSeqStore_t seq)
-{
-    buffer_t buffer;
-    buffer.start = seq.seq;
-    buffer.capacity = seq.capacity * sizeof(rawSeq);
-    return buffer;
-}
-
-static rawSeqStore_t ZSTDMT_getSeq(ZSTDMT_seqPool* seqPool)
-{
-    if (seqPool->bufferSize == 0) {
-        return kNullRawSeqStore;
-    }
-    return bufferToSeq(ZSTDMT_getBuffer(seqPool));
-}
-
-#if ZSTD_RESIZE_SEQPOOL
-static rawSeqStore_t ZSTDMT_resizeSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq)
-{
-  return bufferToSeq(ZSTDMT_resizeBuffer(seqPool, seqToBuffer(seq)));
-}
-#endif
-
-static void ZSTDMT_releaseSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq)
-{
-  ZSTDMT_releaseBuffer(seqPool, seqToBuffer(seq));
-}
-
-static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq)
-{
-  ZSTDMT_setBufferSize(seqPool, nbSeq * sizeof(rawSeq));
-}
-
-static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem)
-{
-    ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(SEQ_POOL_MAX_NB_BUFFERS(nbWorkers), cMem);
-    if (seqPool == NULL) return NULL;
-    ZSTDMT_setNbSeq(seqPool, 0);
-    return seqPool;
-}
-
-static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool)
-{
-    ZSTDMT_freeBufferPool(seqPool);
-}
-
-static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
-{
-    return ZSTDMT_expandBufferPool(pool, SEQ_POOL_MAX_NB_BUFFERS(nbWorkers));
-}
-
-
-/* =====   CCtx Pool   ===== */
-/* a single CCtx Pool can be invoked from multiple threads in parallel */
-
-typedef struct {
-    ZSTD_pthread_mutex_t poolMutex;
-    int totalCCtx;
-    int availCCtx;
-    ZSTD_customMem cMem;
-    ZSTD_CCtx* cctx[1];   /* variable size */
-} ZSTDMT_CCtxPool;
-
-/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
-static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
-{
-    int cid;
-    for (cid=0; cid<pool->totalCCtx; cid++)
-        ZSTD_freeCCtx(pool->cctx[cid]);  /* note : compatible with free on NULL */
-    ZSTD_pthread_mutex_destroy(&pool->poolMutex);
-    ZSTD_customFree(pool, pool->cMem);
-}
-
-/* ZSTDMT_createCCtxPool() :
- * implies nbWorkers >= 1 , checked by caller ZSTDMT_createCCtx() */
-static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
-                                              ZSTD_customMem cMem)
-{
-    ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_customCalloc(
-        sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem);
-    assert(nbWorkers > 0);
-    if (!cctxPool) return NULL;
-    if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
-        ZSTD_customFree(cctxPool, cMem);
-        return NULL;
-    }
-    cctxPool->cMem = cMem;
-    cctxPool->totalCCtx = nbWorkers;
-    cctxPool->availCCtx = 1;   /* at least one cctx for single-thread mode */
-    cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem);
-    if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
-    DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers);
-    return cctxPool;
-}
-
-static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool,
-                                              int nbWorkers)
-{
-    if (srcPool==NULL) return NULL;
-    if (nbWorkers <= srcPool->totalCCtx) return srcPool;   /* good enough */
-    /* need a larger cctx pool */
-    {   ZSTD_customMem const cMem = srcPool->cMem;
-        ZSTDMT_freeCCtxPool(srcPool);
-        return ZSTDMT_createCCtxPool(nbWorkers, cMem);
-    }
-}
-
-/* only works during initialization phase, not during compression */
-static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
-{
-    ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
-    {   unsigned const nbWorkers = cctxPool->totalCCtx;
-        size_t const poolSize = sizeof(*cctxPool)
-                                + (nbWorkers-1) * sizeof(ZSTD_CCtx*);
-        unsigned u;
-        size_t totalCCtxSize = 0;
-        for (u=0; u<nbWorkers; u++) {
-            totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
-        }
-        ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
-        assert(nbWorkers > 0);
-        return poolSize + totalCCtxSize;
-    }
-}
-
-static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool)
-{
-    DEBUGLOG(5, "ZSTDMT_getCCtx");
-    ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
-    if (cctxPool->availCCtx) {
-        cctxPool->availCCtx--;
-        {   ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx];
-            ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
-            return cctx;
-    }   }
-    ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
-    DEBUGLOG(5, "create one more CCtx");
-    return ZSTD_createCCtx_advanced(cctxPool->cMem);   /* note : can be NULL, when creation fails ! */
-}
-
-static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
-{
-    if (cctx==NULL) return;   /* compatibility with release on NULL */
-    ZSTD_pthread_mutex_lock(&pool->poolMutex);
-    if (pool->availCCtx < pool->totalCCtx)
-        pool->cctx[pool->availCCtx++] = cctx;
-    else {
-        /* pool overflow : should not happen, since totalCCtx==nbWorkers */
-        DEBUGLOG(4, "CCtx pool overflow : free cctx");
-        ZSTD_freeCCtx(cctx);
-    }
-    ZSTD_pthread_mutex_unlock(&pool->poolMutex);
-}
-
-/* ====   Serial State   ==== */
-
-typedef struct {
-    void const* start;
-    size_t size;
-} range_t;
-
-typedef struct {
-    /* All variables in the struct are protected by mutex. */
-    ZSTD_pthread_mutex_t mutex;
-    ZSTD_pthread_cond_t cond;
-    ZSTD_CCtx_params params;
-    ldmState_t ldmState;
-    XXH64_state_t xxhState;
-    unsigned nextJobID;
-    /* Protects ldmWindow.
-     * Must be acquired after the main mutex when acquiring both.
-     */
-    ZSTD_pthread_mutex_t ldmWindowMutex;
-    ZSTD_pthread_cond_t ldmWindowCond;  /* Signaled when ldmWindow is updated */
-    ZSTD_window_t ldmWindow;  /* A thread-safe copy of ldmState.window */
-} serialState_t;
-
-static int
-ZSTDMT_serialState_reset(serialState_t* serialState,
-                         ZSTDMT_seqPool* seqPool,
-                         ZSTD_CCtx_params params,
-                         size_t jobSize,
-                         const void* dict, size_t const dictSize,
-                         ZSTD_dictContentType_e dictContentType)
-{
-    /* Adjust parameters */
-    if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
-        DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
-        ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
-        assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
-        assert(params.ldmParams.hashRateLog < 32);
-    } else {
-        ZSTD_memset(&params.ldmParams, 0, sizeof(params.ldmParams));
-    }
-    serialState->nextJobID = 0;
-    if (params.fParams.checksumFlag)
-        XXH64_reset(&serialState->xxhState, 0);
-    if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
-        ZSTD_customMem cMem = params.customMem;
-        unsigned const hashLog = params.ldmParams.hashLog;
-        size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);
-        unsigned const bucketLog =
-            params.ldmParams.hashLog - params.ldmParams.bucketSizeLog;
-        unsigned const prevBucketLog =
-            serialState->params.ldmParams.hashLog -
-            serialState->params.ldmParams.bucketSizeLog;
-        size_t const numBuckets = (size_t)1 << bucketLog;
-        /* Size the seq pool tables */
-        ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
-        /* Reset the window */
-        ZSTD_window_init(&serialState->ldmState.window);
-        /* Resize tables and output space if necessary. */
-        if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) {
-            ZSTD_customFree(serialState->ldmState.hashTable, cMem);
-            serialState->ldmState.hashTable = (ldmEntry_t*)ZSTD_customMalloc(hashSize, cMem);
-        }
-        if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) {
-            ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
-            serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(numBuckets, cMem);
-        }
-        if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets)
-            return 1;
-        /* Zero the tables */
-        ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize);
-        ZSTD_memset(serialState->ldmState.bucketOffsets, 0, numBuckets);
-
-        /* Update window state and fill hash table with dict */
-        serialState->ldmState.loadedDictEnd = 0;
-        if (dictSize > 0) {
-            if (dictContentType == ZSTD_dct_rawContent) {
-                BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
-                ZSTD_window_update(&serialState->ldmState.window, dict, dictSize, /* forceNonContiguous */ 0);
-                ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, &params.ldmParams);
-                serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
-            } else {
-                /* don't even load anything */
-            }
-        }
-
-        /* Initialize serialState's copy of ldmWindow. */
-        serialState->ldmWindow = serialState->ldmState.window;
-    }
-
-    serialState->params = params;
-    serialState->params.jobSize = (U32)jobSize;
-    return 0;
-}
-
-static int ZSTDMT_serialState_init(serialState_t* serialState)
-{
-    int initError = 0;
-    ZSTD_memset(serialState, 0, sizeof(*serialState));
-    initError |= ZSTD_pthread_mutex_init(&serialState->mutex, NULL);
-    initError |= ZSTD_pthread_cond_init(&serialState->cond, NULL);
-    initError |= ZSTD_pthread_mutex_init(&serialState->ldmWindowMutex, NULL);
-    initError |= ZSTD_pthread_cond_init(&serialState->ldmWindowCond, NULL);
-    return initError;
-}
-
-static void ZSTDMT_serialState_free(serialState_t* serialState)
-{
-    ZSTD_customMem cMem = serialState->params.customMem;
-    ZSTD_pthread_mutex_destroy(&serialState->mutex);
-    ZSTD_pthread_cond_destroy(&serialState->cond);
-    ZSTD_pthread_mutex_destroy(&serialState->ldmWindowMutex);
-    ZSTD_pthread_cond_destroy(&serialState->ldmWindowCond);
-    ZSTD_customFree(serialState->ldmState.hashTable, cMem);
-    ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
-}
-
-static void ZSTDMT_serialState_update(serialState_t* serialState,
-                                      ZSTD_CCtx* jobCCtx, rawSeqStore_t seqStore,
-                                      range_t src, unsigned jobID)
-{
-    /* Wait for our turn */
-    ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex);
-    while (serialState->nextJobID < jobID) {
-        DEBUGLOG(5, "wait for serialState->cond");
-        ZSTD_pthread_cond_wait(&serialState->cond, &serialState->mutex);
-    }
-    /* A future job may error and skip our job */
-    if (serialState->nextJobID == jobID) {
-        /* It is now our turn, do any processing necessary */
-        if (serialState->params.ldmParams.enableLdm == ZSTD_ps_enable) {
-            size_t error;
-            assert(seqStore.seq != NULL && seqStore.pos == 0 &&
-                   seqStore.size == 0 && seqStore.capacity > 0);
-            assert(src.size <= serialState->params.jobSize);
-            ZSTD_window_update(&serialState->ldmState.window, src.start, src.size, /* forceNonContiguous */ 0);
-            error = ZSTD_ldm_generateSequences(
-                &serialState->ldmState, &seqStore,
-                &serialState->params.ldmParams, src.start, src.size);
-            /* We provide a large enough buffer to never fail. */
-            assert(!ZSTD_isError(error)); (void)error;
-            /* Update ldmWindow to match the ldmState.window and signal the main
-             * thread if it is waiting for a buffer.
-             */
-            ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex);
-            serialState->ldmWindow = serialState->ldmState.window;
-            ZSTD_pthread_cond_signal(&serialState->ldmWindowCond);
-            ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex);
-        }
-        if (serialState->params.fParams.checksumFlag && src.size > 0)
-            XXH64_update(&serialState->xxhState, src.start, src.size);
-    }
-    /* Now it is the next jobs turn */
-    serialState->nextJobID++;
-    ZSTD_pthread_cond_broadcast(&serialState->cond);
-    ZSTD_pthread_mutex_unlock(&serialState->mutex);
-
-    if (seqStore.size > 0) {
-        size_t const err = ZSTD_referenceExternalSequences(
-            jobCCtx, seqStore.seq, seqStore.size);
-        assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable);
-        assert(!ZSTD_isError(err));
-        (void)err;
-    }
-}
-
-static void ZSTDMT_serialState_ensureFinished(serialState_t* serialState,
-                                              unsigned jobID, size_t cSize)
-{
-    ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex);
-    if (serialState->nextJobID <= jobID) {
-        assert(ZSTD_isError(cSize)); (void)cSize;
-        DEBUGLOG(5, "Skipping past job %u because of error", jobID);
-        serialState->nextJobID = jobID + 1;
-        ZSTD_pthread_cond_broadcast(&serialState->cond);
-
-        ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex);
-        ZSTD_window_clear(&serialState->ldmWindow);
-        ZSTD_pthread_cond_signal(&serialState->ldmWindowCond);
-        ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex);
-    }
-    ZSTD_pthread_mutex_unlock(&serialState->mutex);
-
-}
-
-
-/* ------------------------------------------ */
-/* =====          Worker thread         ===== */
-/* ------------------------------------------ */
-
-static const range_t kNullRange = { NULL, 0 };
-
-typedef struct {
-    size_t   consumed;                   /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx */
-    size_t   cSize;                      /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx, then set0 by mtctx */
-    ZSTD_pthread_mutex_t job_mutex;      /* Thread-safe - used by mtctx and worker */
-    ZSTD_pthread_cond_t job_cond;        /* Thread-safe - used by mtctx and worker */
-    ZSTDMT_CCtxPool* cctxPool;           /* Thread-safe - used by mtctx and (all) workers */
-    ZSTDMT_bufferPool* bufPool;          /* Thread-safe - used by mtctx and (all) workers */
-    ZSTDMT_seqPool* seqPool;             /* Thread-safe - used by mtctx and (all) workers */
-    serialState_t* serial;               /* Thread-safe - used by mtctx and (all) workers */
-    buffer_t dstBuff;                    /* set by worker (or mtctx), then read by worker & mtctx, then modified by mtctx => no barrier */
-    range_t prefix;                      /* set by mtctx, then read by worker & mtctx => no barrier */
-    range_t src;                         /* set by mtctx, then read by worker & mtctx => no barrier */
-    unsigned jobID;                      /* set by mtctx, then read by worker => no barrier */
-    unsigned firstJob;                   /* set by mtctx, then read by worker => no barrier */
-    unsigned lastJob;                    /* set by mtctx, then read by worker => no barrier */
-    ZSTD_CCtx_params params;             /* set by mtctx, then read by worker => no barrier */
-    const ZSTD_CDict* cdict;             /* set by mtctx, then read by worker => no barrier */
-    unsigned long long fullFrameSize;    /* set by mtctx, then read by worker => no barrier */
-    size_t   dstFlushed;                 /* used only by mtctx */
-    unsigned frameChecksumNeeded;        /* used only by mtctx */
-} ZSTDMT_jobDescription;
-
-#define JOB_ERROR(e) {                          \
-    ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);   \
-    job->cSize = e;                             \
-    ZSTD_pthread_mutex_unlock(&job->job_mutex); \
-    goto _endJob;                               \
-}
-
-/* ZSTDMT_compressionJob() is a POOL_function type */
-static void ZSTDMT_compressionJob(void* jobDescription)
-{
-    ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
-    ZSTD_CCtx_params jobParams = job->params;   /* do not modify job->params ! copy it, modify the copy */
-    ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(job->cctxPool);
-    rawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool);
-    buffer_t dstBuff = job->dstBuff;
-    size_t lastCBlockSize = 0;
-
-    /* resources */
-    if (cctx==NULL) JOB_ERROR(ERROR(memory_allocation));
-    if (dstBuff.start == NULL) {   /* streaming job : doesn't provide a dstBuffer */
-        dstBuff = ZSTDMT_getBuffer(job->bufPool);
-        if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation));
-        job->dstBuff = dstBuff;   /* this value can be read in ZSTDMT_flush, when it copies the whole job */
-    }
-    if (jobParams.ldmParams.enableLdm == ZSTD_ps_enable && rawSeqStore.seq == NULL)
-        JOB_ERROR(ERROR(memory_allocation));
-
-    /* Don't compute the checksum for chunks, since we compute it externally,
-     * but write it in the header.
-     */
-    if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
-    /* Don't run LDM for the chunks, since we handle it externally */
-    jobParams.ldmParams.enableLdm = ZSTD_ps_disable;
-    /* Correct nbWorkers to 0. */
-    jobParams.nbWorkers = 0;
-
-
-    /* init */
-    if (job->cdict) {
-        size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize);
-        assert(job->firstJob);  /* only allowed for first job */
-        if (ZSTD_isError(initError)) JOB_ERROR(initError);
-    } else {  /* srcStart points at reloaded section */
-        U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size;
-        {   size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
-            if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
-        }
-        if (!job->firstJob) {
-            size_t const err = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_deterministicRefPrefix, 0);
-            if (ZSTD_isError(err)) JOB_ERROR(err);
-        }
-        {   size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
-                                        job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
-                                        ZSTD_dtlm_fast,
-                                        NULL, /*cdict*/
-                                        &jobParams, pledgedSrcSize);
-            if (ZSTD_isError(initError)) JOB_ERROR(initError);
-    }   }
-
-    /* Perform serial step as early as possible, but after CCtx initialization */
-    ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
-
-    if (!job->firstJob) {  /* flush and overwrite frame header when it's not first job */
-        size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
-        if (ZSTD_isError(hSize)) JOB_ERROR(hSize);
-        DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
-        ZSTD_invalidateRepCodes(cctx);
-    }
-
-    /* compress */
-    {   size_t const chunkSize = 4*ZSTD_BLOCKSIZE_MAX;
-        int const nbChunks = (int)((job->src.size + (chunkSize-1)) / chunkSize);
-        const BYTE* ip = (const BYTE*) job->src.start;
-        BYTE* const ostart = (BYTE*)dstBuff.start;
-        BYTE* op = ostart;
-        BYTE* oend = op + dstBuff.capacity;
-        int chunkNb;
-        if (sizeof(size_t) > sizeof(int)) assert(job->src.size < ((size_t)INT_MAX) * chunkSize);   /* check overflow */
-        DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks);
-        assert(job->cSize == 0);
-        for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
-            size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize);
-            if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
-            ip += chunkSize;
-            op += cSize; assert(op < oend);
-            /* stats */
-            ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
-            job->cSize += cSize;
-            job->consumed = chunkSize * chunkNb;
-            DEBUGLOG(5, "ZSTDMT_compressionJob: compress new block : cSize==%u bytes (total: %u)",
-                        (U32)cSize, (U32)job->cSize);
-            ZSTD_pthread_cond_signal(&job->job_cond);   /* warns some more data is ready to be flushed */
-            ZSTD_pthread_mutex_unlock(&job->job_mutex);
-        }
-        /* last block */
-        assert(chunkSize > 0);
-        assert((chunkSize & (chunkSize - 1)) == 0);  /* chunkSize must be power of 2 for mask==(chunkSize-1) to work */
-        if ((nbChunks > 0) | job->lastJob /*must output a "last block" flag*/ ) {
-            size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
-            size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
-            size_t const cSize = (job->lastJob) ?
-                 ZSTD_compressEnd     (cctx, op, oend-op, ip, lastBlockSize) :
-                 ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
-            if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
-            lastCBlockSize = cSize;
-    }   }
-    if (!job->firstJob) {
-        /* Double check that we don't have an ext-dict, because then our
-         * repcode invalidation doesn't work.
-         */
-        assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
-    }
-    ZSTD_CCtx_trace(cctx, 0);
-
-_endJob:
-    ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize);
-    if (job->prefix.size > 0)
-        DEBUGLOG(5, "Finished with prefix: %zx", (size_t)job->prefix.start);
-    DEBUGLOG(5, "Finished with source: %zx", (size_t)job->src.start);
-    /* release resources */
-    ZSTDMT_releaseSeq(job->seqPool, rawSeqStore);
-    ZSTDMT_releaseCCtx(job->cctxPool, cctx);
-    /* report */
-    ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
-    if (ZSTD_isError(job->cSize)) assert(lastCBlockSize == 0);
-    job->cSize += lastCBlockSize;
-    job->consumed = job->src.size;  /* when job->consumed == job->src.size , compression job is presumed completed */
-    ZSTD_pthread_cond_signal(&job->job_cond);
-    ZSTD_pthread_mutex_unlock(&job->job_mutex);
-}
-
-
-/* ------------------------------------------ */
-/* =====   Multi-threaded compression   ===== */
-/* ------------------------------------------ */
-
-typedef struct {
-    range_t prefix;         /* read-only non-owned prefix buffer */
-    buffer_t buffer;
-    size_t filled;
-} inBuff_t;
-
-typedef struct {
-  BYTE* buffer;     /* The round input buffer. All jobs get references
-                     * to pieces of the buffer. ZSTDMT_tryGetInputRange()
-                     * handles handing out job input buffers, and makes
-                     * sure it doesn't overlap with any pieces still in use.
-                     */
-  size_t capacity;  /* The capacity of buffer. */
-  size_t pos;       /* The position of the current inBuff in the round
-                     * buffer. Updated past the end if the inBuff once
-                     * the inBuff is sent to the worker thread.
-                     * pos <= capacity.
-                     */
-} roundBuff_t;
-
-static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
-
-#define RSYNC_LENGTH 32
-/* Don't create chunks smaller than the zstd block size.
- * This stops us from regressing compression ratio too much,
- * and ensures our output fits in ZSTD_compressBound().
- *
- * If this is shrunk < ZSTD_BLOCKSIZELOG_MIN then
- * ZSTD_COMPRESSBOUND() will need to be updated.
- */
-#define RSYNC_MIN_BLOCK_LOG ZSTD_BLOCKSIZELOG_MAX
-#define RSYNC_MIN_BLOCK_SIZE (1<<RSYNC_MIN_BLOCK_LOG)
-
-typedef struct {
-  U64 hash;
-  U64 hitMask;
-  U64 primePower;
-} rsyncState_t;
-
-struct ZSTDMT_CCtx_s {
-    POOL_ctx* factory;
-    ZSTDMT_jobDescription* jobs;
-    ZSTDMT_bufferPool* bufPool;
-    ZSTDMT_CCtxPool* cctxPool;
-    ZSTDMT_seqPool* seqPool;
-    ZSTD_CCtx_params params;
-    size_t targetSectionSize;
-    size_t targetPrefixSize;
-    int jobReady;        /* 1 => one job is already prepared, but pool has shortage of workers. Don't create a new job. */
-    inBuff_t inBuff;
-    roundBuff_t roundBuff;
-    serialState_t serial;
-    rsyncState_t rsync;
-    unsigned jobIDMask;
-    unsigned doneJobID;
-    unsigned nextJobID;
-    unsigned frameEnded;
-    unsigned allJobsCompleted;
-    unsigned long long frameContentSize;
-    unsigned long long consumed;
-    unsigned long long produced;
-    ZSTD_customMem cMem;
-    ZSTD_CDict* cdictLocal;
-    const ZSTD_CDict* cdict;
-    unsigned providedFactory: 1;
-};
-
-static void ZSTDMT_freeJobsTable(ZSTDMT_jobDescription* jobTable, U32 nbJobs, ZSTD_customMem cMem)
-{
-    U32 jobNb;
-    if (jobTable == NULL) return;
-    for (jobNb=0; jobNb<nbJobs; jobNb++) {
-        ZSTD_pthread_mutex_destroy(&jobTable[jobNb].job_mutex);
-        ZSTD_pthread_cond_destroy(&jobTable[jobNb].job_cond);
-    }
-    ZSTD_customFree(jobTable, cMem);
-}
-
-/* ZSTDMT_allocJobsTable()
- * allocate and init a job table.
- * update *nbJobsPtr to next power of 2 value, as size of table */
-static ZSTDMT_jobDescription* ZSTDMT_createJobsTable(U32* nbJobsPtr, ZSTD_customMem cMem)
-{
-    U32 const nbJobsLog2 = ZSTD_highbit32(*nbJobsPtr) + 1;
-    U32 const nbJobs = 1 << nbJobsLog2;
-    U32 jobNb;
-    ZSTDMT_jobDescription* const jobTable = (ZSTDMT_jobDescription*)
-                ZSTD_customCalloc(nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
-    int initError = 0;
-    if (jobTable==NULL) return NULL;
-    *nbJobsPtr = nbJobs;
-    for (jobNb=0; jobNb<nbJobs; jobNb++) {
-        initError |= ZSTD_pthread_mutex_init(&jobTable[jobNb].job_mutex, NULL);
-        initError |= ZSTD_pthread_cond_init(&jobTable[jobNb].job_cond, NULL);
-    }
-    if (initError != 0) {
-        ZSTDMT_freeJobsTable(jobTable, nbJobs, cMem);
-        return NULL;
-    }
-    return jobTable;
-}
-
-static size_t ZSTDMT_expandJobsTable (ZSTDMT_CCtx* mtctx, U32 nbWorkers) {
-    U32 nbJobs = nbWorkers + 2;
-    if (nbJobs > mtctx->jobIDMask+1) {  /* need more job capacity */
-        ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
-        mtctx->jobIDMask = 0;
-        mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, mtctx->cMem);
-        if (mtctx->jobs==NULL) return ERROR(memory_allocation);
-        assert((nbJobs != 0) && ((nbJobs & (nbJobs - 1)) == 0));  /* ensure nbJobs is a power of 2 */
-        mtctx->jobIDMask = nbJobs - 1;
-    }
-    return 0;
-}
-
-
-/* ZSTDMT_CCtxParam_setNbWorkers():
- * Internal use only */
-static size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
-{
-    return ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, (int)nbWorkers);
-}
-
-MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem, ZSTD_threadPool* pool)
-{
-    ZSTDMT_CCtx* mtctx;
-    U32 nbJobs = nbWorkers + 2;
-    int initError;
-    DEBUGLOG(3, "ZSTDMT_createCCtx_advanced (nbWorkers = %u)", nbWorkers);
-
-    if (nbWorkers < 1) return NULL;
-    nbWorkers = MIN(nbWorkers , ZSTDMT_NBWORKERS_MAX);
-    if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL))
-        /* invalid custom allocator */
-        return NULL;
-
-    mtctx = (ZSTDMT_CCtx*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtx), cMem);
-    if (!mtctx) return NULL;
-    ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers);
-    mtctx->cMem = cMem;
-    mtctx->allJobsCompleted = 1;
-    if (pool != NULL) {
-      mtctx->factory = pool;
-      mtctx->providedFactory = 1;
-    }
-    else {
-      mtctx->factory = POOL_create_advanced(nbWorkers, 0, cMem);
-      mtctx->providedFactory = 0;
-    }
-    mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem);
-    assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0);  /* ensure nbJobs is a power of 2 */
-    mtctx->jobIDMask = nbJobs - 1;
-    mtctx->bufPool = ZSTDMT_createBufferPool(BUF_POOL_MAX_NB_BUFFERS(nbWorkers), cMem);
-    mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem);
-    mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem);
-    initError = ZSTDMT_serialState_init(&mtctx->serial);
-    mtctx->roundBuff = kNullRoundBuff;
-    if (!mtctx->factory | !mtctx->jobs | !mtctx->bufPool | !mtctx->cctxPool | !mtctx->seqPool | initError) {
-        ZSTDMT_freeCCtx(mtctx);
-        return NULL;
-    }
-    DEBUGLOG(3, "mt_cctx created, for %u threads", nbWorkers);
-    return mtctx;
-}
-
-ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem, ZSTD_threadPool* pool)
-{
-#ifdef ZSTD_MULTITHREAD
-    return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem, pool);
-#else
-    (void)nbWorkers;
-    (void)cMem;
-    (void)pool;
-    return NULL;
-#endif
-}
-
-
-/* ZSTDMT_releaseAllJobResources() :
- * note : ensure all workers are killed first ! */
-static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
-{
-    unsigned jobID;
-    DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
-    for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
-        /* Copy the mutex/cond out */
-        ZSTD_pthread_mutex_t const mutex = mtctx->jobs[jobID].job_mutex;
-        ZSTD_pthread_cond_t const cond = mtctx->jobs[jobID].job_cond;
-
-        DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
-        ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
-
-        /* Clear the job description, but keep the mutex/cond */
-        ZSTD_memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID]));
-        mtctx->jobs[jobID].job_mutex = mutex;
-        mtctx->jobs[jobID].job_cond = cond;
-    }
-    mtctx->inBuff.buffer = g_nullBuffer;
-    mtctx->inBuff.filled = 0;
-    mtctx->allJobsCompleted = 1;
-}
-
-static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* mtctx)
-{
-    DEBUGLOG(4, "ZSTDMT_waitForAllJobsCompleted");
-    while (mtctx->doneJobID < mtctx->nextJobID) {
-        unsigned const jobID = mtctx->doneJobID & mtctx->jobIDMask;
-        ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex);
-        while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) {
-            DEBUGLOG(4, "waiting for jobCompleted signal from job %u", mtctx->doneJobID);   /* we want to block when waiting for data to flush */
-            ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex);
-        }
-        ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex);
-        mtctx->doneJobID++;
-    }
-}
-
-size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
-{
-    if (mtctx==NULL) return 0;   /* compatible with free on NULL */
-    if (!mtctx->providedFactory)
-        POOL_free(mtctx->factory);   /* stop and free worker threads */
-    ZSTDMT_releaseAllJobResources(mtctx);  /* release job resources into pools first */
-    ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
-    ZSTDMT_freeBufferPool(mtctx->bufPool);
-    ZSTDMT_freeCCtxPool(mtctx->cctxPool);
-    ZSTDMT_freeSeqPool(mtctx->seqPool);
-    ZSTDMT_serialState_free(&mtctx->serial);
-    ZSTD_freeCDict(mtctx->cdictLocal);
-    if (mtctx->roundBuff.buffer)
-        ZSTD_customFree(mtctx->roundBuff.buffer, mtctx->cMem);
-    ZSTD_customFree(mtctx, mtctx->cMem);
-    return 0;
-}
-
-size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
-{
-    if (mtctx == NULL) return 0;   /* supports sizeof NULL */
-    return sizeof(*mtctx)
-            + POOL_sizeof(mtctx->factory)
-            + ZSTDMT_sizeof_bufferPool(mtctx->bufPool)
-            + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription)
-            + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool)
-            + ZSTDMT_sizeof_seqPool(mtctx->seqPool)
-            + ZSTD_sizeof_CDict(mtctx->cdictLocal)
-            + mtctx->roundBuff.capacity;
-}
-
-
-/* ZSTDMT_resize() :
- * @return : error code if fails, 0 on success */
-static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
-{
-    if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
-    FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , "");
-    mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, BUF_POOL_MAX_NB_BUFFERS(nbWorkers));
-    if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
-    mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
-    if (mtctx->cctxPool == NULL) return ERROR(memory_allocation);
-    mtctx->seqPool = ZSTDMT_expandSeqPool(mtctx->seqPool, nbWorkers);
-    if (mtctx->seqPool == NULL) return ERROR(memory_allocation);
-    ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers);
-    return 0;
-}
-
-
-/*! ZSTDMT_updateCParams_whileCompressing() :
- *  Updates a selected set of compression parameters, remaining compatible with currently active frame.
- *  New parameters will be applied to next compression job. */
-void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams)
-{
-    U32 const saved_wlog = mtctx->params.cParams.windowLog;   /* Do not modify windowLog while compressing */
-    int const compressionLevel = cctxParams->compressionLevel;
-    DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)",
-                compressionLevel);
-    mtctx->params.compressionLevel = compressionLevel;
-    {   ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
-        cParams.windowLog = saved_wlog;
-        mtctx->params.cParams = cParams;
-    }
-}
-
-/* ZSTDMT_getFrameProgression():
- * tells how much data has been consumed (input) and produced (output) for current frame.
- * able to count progression inside worker threads.
- * Note : mutex will be acquired during statistics collection inside workers. */
-ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
-{
-    ZSTD_frameProgression fps;
-    DEBUGLOG(5, "ZSTDMT_getFrameProgression");
-    fps.ingested = mtctx->consumed + mtctx->inBuff.filled;
-    fps.consumed = mtctx->consumed;
-    fps.produced = fps.flushed = mtctx->produced;
-    fps.currentJobID = mtctx->nextJobID;
-    fps.nbActiveWorkers = 0;
-    {   unsigned jobNb;
-        unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1);
-        DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)",
-                    mtctx->doneJobID, lastJobNb, mtctx->jobReady)
-        for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) {
-            unsigned const wJobID = jobNb & mtctx->jobIDMask;
-            ZSTDMT_jobDescription* jobPtr = &mtctx->jobs[wJobID];
-            ZSTD_pthread_mutex_lock(&jobPtr->job_mutex);
-            {   size_t const cResult = jobPtr->cSize;
-                size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
-                size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
-                assert(flushed <= produced);
-                fps.ingested += jobPtr->src.size;
-                fps.consumed += jobPtr->consumed;
-                fps.produced += produced;
-                fps.flushed  += flushed;
-                fps.nbActiveWorkers += (jobPtr->consumed < jobPtr->src.size);
-            }
-            ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
-        }
-    }
-    return fps;
-}
-
-
-size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
-{
-    size_t toFlush;
-    unsigned const jobID = mtctx->doneJobID;
-    assert(jobID <= mtctx->nextJobID);
-    if (jobID == mtctx->nextJobID) return 0;   /* no active job => nothing to flush */
-
-    /* look into oldest non-fully-flushed job */
-    {   unsigned const wJobID = jobID & mtctx->jobIDMask;
-        ZSTDMT_jobDescription* const jobPtr = &mtctx->jobs[wJobID];
-        ZSTD_pthread_mutex_lock(&jobPtr->job_mutex);
-        {   size_t const cResult = jobPtr->cSize;
-            size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
-            size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
-            assert(flushed <= produced);
-            assert(jobPtr->consumed <= jobPtr->src.size);
-            toFlush = produced - flushed;
-            /* if toFlush==0, nothing is available to flush.
-             * However, jobID is expected to still be active:
-             * if jobID was already completed and fully flushed,
-             * ZSTDMT_flushProduced() should have already moved onto next job.
-             * Therefore, some input has not yet been consumed. */
-            if (toFlush==0) {
-                assert(jobPtr->consumed < jobPtr->src.size);
-            }
-        }
-        ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
-    }
-
-    return toFlush;
-}
-
-
-/* ------------------------------------------ */
-/* =====   Multi-threaded compression   ===== */
-/* ------------------------------------------ */
-
-static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
-{
-    unsigned jobLog;
-    if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
-        /* In Long Range Mode, the windowLog is typically oversized.
-         * In which case, it's preferable to determine the jobSize
-         * based on cycleLog instead. */
-        jobLog = MAX(21, ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy) + 3);
-    } else {
-        jobLog = MAX(20, params->cParams.windowLog + 2);
-    }
-    return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
-}
-
-static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
-{
-    switch(strat)
-    {
-        case ZSTD_btultra2:
-            return 9;
-        case ZSTD_btultra:
-        case ZSTD_btopt:
-            return 8;
-        case ZSTD_btlazy2:
-        case ZSTD_lazy2:
-            return 7;
-        case ZSTD_lazy:
-        case ZSTD_greedy:
-        case ZSTD_dfast:
-        case ZSTD_fast:
-        default:;
-    }
-    return 6;
-}
-
-static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
-{
-    assert(0 <= ovlog && ovlog <= 9);
-    if (ovlog == 0) return ZSTDMT_overlapLog_default(strat);
-    return ovlog;
-}
-
-static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
-{
-    int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
-    int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
-    assert(0 <= overlapRLog && overlapRLog <= 8);
-    if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
-        /* In Long Range Mode, the windowLog is typically oversized.
-         * In which case, it's preferable to determine the jobSize
-         * based on chainLog instead.
-         * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
-        ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
-                - overlapRLog;
-    }
-    assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
-    DEBUGLOG(4, "overlapLog : %i", params->overlapLog);
-    DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
-    return (ovLog==0) ? 0 : (size_t)1 << ovLog;
-}
-
-/* ====================================== */
-/* =======      Streaming API     ======= */
-/* ====================================== */
-
-size_t ZSTDMT_initCStream_internal(
-        ZSTDMT_CCtx* mtctx,
-        const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
-        const ZSTD_CDict* cdict, ZSTD_CCtx_params params,
-        unsigned long long pledgedSrcSize)
-{
-    DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u)",
-                (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx);
-
-    /* params supposed partially fully validated at this point */
-    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
-    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
-
-    /* init */
-    if (params.nbWorkers != mtctx->params.nbWorkers)
-        FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) , "");
-
-    if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
-    if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;
-
-    DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
-
-    if (mtctx->allJobsCompleted == 0) {   /* previous compression not correctly finished */
-        ZSTDMT_waitForAllJobsCompleted(mtctx);
-        ZSTDMT_releaseAllJobResources(mtctx);
-        mtctx->allJobsCompleted = 1;
-    }
-
-    mtctx->params = params;
-    mtctx->frameContentSize = pledgedSrcSize;
-    if (dict) {
-        ZSTD_freeCDict(mtctx->cdictLocal);
-        mtctx->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
-                                                    ZSTD_dlm_byCopy, dictContentType, /* note : a loadPrefix becomes an internal CDict */
-                                                    params.cParams, mtctx->cMem);
-        mtctx->cdict = mtctx->cdictLocal;
-        if (mtctx->cdictLocal == NULL) return ERROR(memory_allocation);
-    } else {
-        ZSTD_freeCDict(mtctx->cdictLocal);
-        mtctx->cdictLocal = NULL;
-        mtctx->cdict = cdict;
-    }
-
-    mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(&params);
-    DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
-    mtctx->targetSectionSize = params.jobSize;
-    if (mtctx->targetSectionSize == 0) {
-        mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(&params);
-    }
-    assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
-
-    if (params.rsyncable) {
-        /* Aim for the targetsectionSize as the average job size. */
-        U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10);
-        U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10);
-        /* We refuse to create jobs < RSYNC_MIN_BLOCK_SIZE bytes, so make sure our
-         * expected job size is at least 4x larger. */
-        assert(rsyncBits >= RSYNC_MIN_BLOCK_LOG + 2);
-        DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
-        mtctx->rsync.hash = 0;
-        mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
-        mtctx->rsync.primePower = ZSTD_rollingHash_primePower(RSYNC_LENGTH);
-    }
-    if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize;  /* job size must be >= overlap size */
-    DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), (U32)params.jobSize);
-    DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10));
-    ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
-    {
-        /* If ldm is enabled we need windowSize space. */
-        size_t const windowSize = mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable ? (1U << mtctx->params.cParams.windowLog) : 0;
-        /* Two buffers of slack, plus extra space for the overlap
-         * This is the minimum slack that LDM works with. One extra because
-         * flush might waste up to targetSectionSize-1 bytes. Another extra
-         * for the overlap (if > 0), then one to fill which doesn't overlap
-         * with the LDM window.
-         */
-        size_t const nbSlackBuffers = 2 + (mtctx->targetPrefixSize > 0);
-        size_t const slackSize = mtctx->targetSectionSize * nbSlackBuffers;
-        /* Compute the total size, and always have enough slack */
-        size_t const nbWorkers = MAX(mtctx->params.nbWorkers, 1);
-        size_t const sectionsSize = mtctx->targetSectionSize * nbWorkers;
-        size_t const capacity = MAX(windowSize, sectionsSize) + slackSize;
-        if (mtctx->roundBuff.capacity < capacity) {
-            if (mtctx->roundBuff.buffer)
-                ZSTD_customFree(mtctx->roundBuff.buffer, mtctx->cMem);
-            mtctx->roundBuff.buffer = (BYTE*)ZSTD_customMalloc(capacity, mtctx->cMem);
-            if (mtctx->roundBuff.buffer == NULL) {
-                mtctx->roundBuff.capacity = 0;
-                return ERROR(memory_allocation);
-            }
-            mtctx->roundBuff.capacity = capacity;
-        }
-    }
-    DEBUGLOG(4, "roundBuff capacity : %u KB", (U32)(mtctx->roundBuff.capacity>>10));
-    mtctx->roundBuff.pos = 0;
-    mtctx->inBuff.buffer = g_nullBuffer;
-    mtctx->inBuff.filled = 0;
-    mtctx->inBuff.prefix = kNullRange;
-    mtctx->doneJobID = 0;
-    mtctx->nextJobID = 0;
-    mtctx->frameEnded = 0;
-    mtctx->allJobsCompleted = 0;
-    mtctx->consumed = 0;
-    mtctx->produced = 0;
-    if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize,
-                                 dict, dictSize, dictContentType))
-        return ERROR(memory_allocation);
-    return 0;
-}
-
-
-/* ZSTDMT_writeLastEmptyBlock()
- * Write a single empty block with an end-of-frame to finish a frame.
- * Job must be created from streaming variant.
- * This function is always successful if expected conditions are fulfilled.
- */
-static void ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job)
-{
-    assert(job->lastJob == 1);
-    assert(job->src.size == 0);   /* last job is empty -> will be simplified into a last empty block */
-    assert(job->firstJob == 0);   /* cannot be first job, as it also needs to create frame header */
-    assert(job->dstBuff.start == NULL);   /* invoked from streaming variant only (otherwise, dstBuff might be user's output) */
-    job->dstBuff = ZSTDMT_getBuffer(job->bufPool);
-    if (job->dstBuff.start == NULL) {
-      job->cSize = ERROR(memory_allocation);
-      return;
-    }
-    assert(job->dstBuff.capacity >= ZSTD_blockHeaderSize);   /* no buffer should ever be that small */
-    job->src = kNullRange;
-    job->cSize = ZSTD_writeLastEmptyBlock(job->dstBuff.start, job->dstBuff.capacity);
-    assert(!ZSTD_isError(job->cSize));
-    assert(job->consumed == 0);
-}
-
-static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZSTD_EndDirective endOp)
-{
-    unsigned const jobID = mtctx->nextJobID & mtctx->jobIDMask;
-    int const endFrame = (endOp == ZSTD_e_end);
-
-    if (mtctx->nextJobID > mtctx->doneJobID + mtctx->jobIDMask) {
-        DEBUGLOG(5, "ZSTDMT_createCompressionJob: will not create new job : table is full");
-        assert((mtctx->nextJobID & mtctx->jobIDMask) == (mtctx->doneJobID & mtctx->jobIDMask));
-        return 0;
-    }
-
-    if (!mtctx->jobReady) {
-        BYTE const* src = (BYTE const*)mtctx->inBuff.buffer.start;
-        DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ",
-                    mtctx->nextJobID, (U32)srcSize, (U32)mtctx->inBuff.prefix.size);
-        mtctx->jobs[jobID].src.start = src;
-        mtctx->jobs[jobID].src.size = srcSize;
-        assert(mtctx->inBuff.filled >= srcSize);
-        mtctx->jobs[jobID].prefix = mtctx->inBuff.prefix;
-        mtctx->jobs[jobID].consumed = 0;
-        mtctx->jobs[jobID].cSize = 0;
-        mtctx->jobs[jobID].params = mtctx->params;
-        mtctx->jobs[jobID].cdict = mtctx->nextJobID==0 ? mtctx->cdict : NULL;
-        mtctx->jobs[jobID].fullFrameSize = mtctx->frameContentSize;
-        mtctx->jobs[jobID].dstBuff = g_nullBuffer;
-        mtctx->jobs[jobID].cctxPool = mtctx->cctxPool;
-        mtctx->jobs[jobID].bufPool = mtctx->bufPool;
-        mtctx->jobs[jobID].seqPool = mtctx->seqPool;
-        mtctx->jobs[jobID].serial = &mtctx->serial;
-        mtctx->jobs[jobID].jobID = mtctx->nextJobID;
-        mtctx->jobs[jobID].firstJob = (mtctx->nextJobID==0);
-        mtctx->jobs[jobID].lastJob = endFrame;
-        mtctx->jobs[jobID].frameChecksumNeeded = mtctx->params.fParams.checksumFlag && endFrame && (mtctx->nextJobID>0);
-        mtctx->jobs[jobID].dstFlushed = 0;
-
-        /* Update the round buffer pos and clear the input buffer to be reset */
-        mtctx->roundBuff.pos += srcSize;
-        mtctx->inBuff.buffer = g_nullBuffer;
-        mtctx->inBuff.filled = 0;
-        /* Set the prefix */
-        if (!endFrame) {
-            size_t const newPrefixSize = MIN(srcSize, mtctx->targetPrefixSize);
-            mtctx->inBuff.prefix.start = src + srcSize - newPrefixSize;
-            mtctx->inBuff.prefix.size = newPrefixSize;
-        } else {   /* endFrame==1 => no need for another input buffer */
-            mtctx->inBuff.prefix = kNullRange;
-            mtctx->frameEnded = endFrame;
-            if (mtctx->nextJobID == 0) {
-                /* single job exception : checksum is already calculated directly within worker thread */
-                mtctx->params.fParams.checksumFlag = 0;
-        }   }
-
-        if ( (srcSize == 0)
-          && (mtctx->nextJobID>0)/*single job must also write frame header*/ ) {
-            DEBUGLOG(5, "ZSTDMT_createCompressionJob: creating a last empty block to end frame");
-            assert(endOp == ZSTD_e_end);  /* only possible case : need to end the frame with an empty last block */
-            ZSTDMT_writeLastEmptyBlock(mtctx->jobs + jobID);
-            mtctx->nextJobID++;
-            return 0;
-        }
-    }
-
-    DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes  (end:%u, jobNb == %u (mod:%u))",
-                mtctx->nextJobID,
-                (U32)mtctx->jobs[jobID].src.size,
-                mtctx->jobs[jobID].lastJob,
-                mtctx->nextJobID,
-                jobID);
-    if (POOL_tryAdd(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[jobID])) {
-        mtctx->nextJobID++;
-        mtctx->jobReady = 0;
-    } else {
-        DEBUGLOG(5, "ZSTDMT_createCompressionJob: no worker available for job %u", mtctx->nextJobID);
-        mtctx->jobReady = 1;
-    }
-    return 0;
-}
-
-
-/*! ZSTDMT_flushProduced() :
- *  flush whatever data has been produced but not yet flushed in current job.
- *  move to next job if current one is fully flushed.
- * `output` : `pos` will be updated with amount of data flushed .
- * `blockToFlush` : if >0, the function will block and wait if there is no data available to flush .
- * @return : amount of data remaining within internal buffer, 0 if no more, 1 if unknown but > 0, or an error code */
-static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, unsigned blockToFlush, ZSTD_EndDirective end)
-{
-    unsigned const wJobID = mtctx->doneJobID & mtctx->jobIDMask;
-    DEBUGLOG(5, "ZSTDMT_flushProduced (blocking:%u , job %u <= %u)",
-                blockToFlush, mtctx->doneJobID, mtctx->nextJobID);
-    assert(output->size >= output->pos);
-
-    ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex);
-    if (  blockToFlush
-      && (mtctx->doneJobID < mtctx->nextJobID) ) {
-        assert(mtctx->jobs[wJobID].dstFlushed <= mtctx->jobs[wJobID].cSize);
-        while (mtctx->jobs[wJobID].dstFlushed == mtctx->jobs[wJobID].cSize) {  /* nothing to flush */
-            if (mtctx->jobs[wJobID].consumed == mtctx->jobs[wJobID].src.size) {
-                DEBUGLOG(5, "job %u is completely consumed (%u == %u) => don't wait for cond, there will be none",
-                            mtctx->doneJobID, (U32)mtctx->jobs[wJobID].consumed, (U32)mtctx->jobs[wJobID].src.size);
-                break;
-            }
-            DEBUGLOG(5, "waiting for something to flush from job %u (currently flushed: %u bytes)",
-                        mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed);
-            ZSTD_pthread_cond_wait(&mtctx->jobs[wJobID].job_cond, &mtctx->jobs[wJobID].job_mutex);  /* block when nothing to flush but some to come */
-    }   }
-
-    /* try to flush something */
-    {   size_t cSize = mtctx->jobs[wJobID].cSize;                  /* shared */
-        size_t const srcConsumed = mtctx->jobs[wJobID].consumed;   /* shared */
-        size_t const srcSize = mtctx->jobs[wJobID].src.size;       /* read-only, could be done after mutex lock, but no-declaration-after-statement */
-        ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
-        if (ZSTD_isError(cSize)) {
-            DEBUGLOG(5, "ZSTDMT_flushProduced: job %u : compression error detected : %s",
-                        mtctx->doneJobID, ZSTD_getErrorName(cSize));
-            ZSTDMT_waitForAllJobsCompleted(mtctx);
-            ZSTDMT_releaseAllJobResources(mtctx);
-            return cSize;
-        }
-        /* add frame checksum if necessary (can only happen once) */
-        assert(srcConsumed <= srcSize);
-        if ( (srcConsumed == srcSize)   /* job completed -> worker no longer active */
-          && mtctx->jobs[wJobID].frameChecksumNeeded ) {
-            U32 const checksum = (U32)XXH64_digest(&mtctx->serial.xxhState);
-            DEBUGLOG(4, "ZSTDMT_flushProduced: writing checksum : %08X \n", checksum);
-            MEM_writeLE32((char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].cSize, checksum);
-            cSize += 4;
-            mtctx->jobs[wJobID].cSize += 4;  /* can write this shared value, as worker is no longer active */
-            mtctx->jobs[wJobID].frameChecksumNeeded = 0;
-        }
-
-        if (cSize > 0) {   /* compression is ongoing or completed */
-            size_t const toFlush = MIN(cSize - mtctx->jobs[wJobID].dstFlushed, output->size - output->pos);
-            DEBUGLOG(5, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%u/%u, generated:%u)",
-                        (U32)toFlush, mtctx->doneJobID, (U32)srcConsumed, (U32)srcSize, (U32)cSize);
-            assert(mtctx->doneJobID < mtctx->nextJobID);
-            assert(cSize >= mtctx->jobs[wJobID].dstFlushed);
-            assert(mtctx->jobs[wJobID].dstBuff.start != NULL);
-            if (toFlush > 0) {
-                ZSTD_memcpy((char*)output->dst + output->pos,
-                    (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed,
-                    toFlush);
-            }
-            output->pos += toFlush;
-            mtctx->jobs[wJobID].dstFlushed += toFlush;  /* can write : this value is only used by mtctx */
-
-            if ( (srcConsumed == srcSize)    /* job is completed */
-              && (mtctx->jobs[wJobID].dstFlushed == cSize) ) {   /* output buffer fully flushed => free this job position */
-                DEBUGLOG(5, "Job %u completed (%u bytes), moving to next one",
-                        mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed);
-                ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[wJobID].dstBuff);
-                DEBUGLOG(5, "dstBuffer released");
-                mtctx->jobs[wJobID].dstBuff = g_nullBuffer;
-                mtctx->jobs[wJobID].cSize = 0;   /* ensure this job slot is considered "not started" in future check */
-                mtctx->consumed += srcSize;
-                mtctx->produced += cSize;
-                mtctx->doneJobID++;
-        }   }
-
-        /* return value : how many bytes left in buffer ; fake it to 1 when unknown but >0 */
-        if (cSize > mtctx->jobs[wJobID].dstFlushed) return (cSize - mtctx->jobs[wJobID].dstFlushed);
-        if (srcSize > srcConsumed) return 1;   /* current job not completely compressed */
-    }
-    if (mtctx->doneJobID < mtctx->nextJobID) return 1;   /* some more jobs ongoing */
-    if (mtctx->jobReady) return 1;      /* one job is ready to push, just not yet in the list */
-    if (mtctx->inBuff.filled > 0) return 1;   /* input is not empty, and still needs to be converted into a job */
-    mtctx->allJobsCompleted = mtctx->frameEnded;   /* all jobs are entirely flushed => if this one is last one, frame is completed */
-    if (end == ZSTD_e_end) return !mtctx->frameEnded;  /* for ZSTD_e_end, question becomes : is frame completed ? instead of : are internal buffers fully flushed ? */
-    return 0;   /* internal buffers fully flushed */
-}
-
-/**
- * Returns the range of data used by the earliest job that is not yet complete.
- * If the data of the first job is broken up into two segments, we cover both
- * sections.
- */
-static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
-{
-    unsigned const firstJobID = mtctx->doneJobID;
-    unsigned const lastJobID = mtctx->nextJobID;
-    unsigned jobID;
-
-    for (jobID = firstJobID; jobID < lastJobID; ++jobID) {
-        unsigned const wJobID = jobID & mtctx->jobIDMask;
-        size_t consumed;
-
-        ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex);
-        consumed = mtctx->jobs[wJobID].consumed;
-        ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
-
-        if (consumed < mtctx->jobs[wJobID].src.size) {
-            range_t range = mtctx->jobs[wJobID].prefix;
-            if (range.size == 0) {
-                /* Empty prefix */
-                range = mtctx->jobs[wJobID].src;
-            }
-            /* Job source in multiple segments not supported yet */
-            assert(range.start <= mtctx->jobs[wJobID].src.start);
-            return range;
-        }
-    }
-    return kNullRange;
-}
-
-/**
- * Returns non-zero iff buffer and range overlap.
- */
-static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
-{
-    BYTE const* const bufferStart = (BYTE const*)buffer.start;
-    BYTE const* const rangeStart = (BYTE const*)range.start;
-
-    if (rangeStart == NULL || bufferStart == NULL)
-        return 0;
-
-    {
-        BYTE const* const bufferEnd = bufferStart + buffer.capacity;
-        BYTE const* const rangeEnd = rangeStart + range.size;
-
-        /* Empty ranges cannot overlap */
-        if (bufferStart == bufferEnd || rangeStart == rangeEnd)
-            return 0;
-
-        return bufferStart < rangeEnd && rangeStart < bufferEnd;
-    }
-}
-
-static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
-{
-    range_t extDict;
-    range_t prefix;
-
-    DEBUGLOG(5, "ZSTDMT_doesOverlapWindow");
-    extDict.start = window.dictBase + window.lowLimit;
-    extDict.size = window.dictLimit - window.lowLimit;
-
-    prefix.start = window.base + window.dictLimit;
-    prefix.size = window.nextSrc - (window.base + window.dictLimit);
-    DEBUGLOG(5, "extDict [0x%zx, 0x%zx)",
-                (size_t)extDict.start,
-                (size_t)extDict.start + extDict.size);
-    DEBUGLOG(5, "prefix  [0x%zx, 0x%zx)",
-                (size_t)prefix.start,
-                (size_t)prefix.start + prefix.size);
-
-    return ZSTDMT_isOverlapped(buffer, extDict)
-        || ZSTDMT_isOverlapped(buffer, prefix);
-}
-
-static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)
-{
-    if (mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable) {
-        ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
-        DEBUGLOG(5, "ZSTDMT_waitForLdmComplete");
-        DEBUGLOG(5, "source  [0x%zx, 0x%zx)",
-                    (size_t)buffer.start,
-                    (size_t)buffer.start + buffer.capacity);
-        ZSTD_PTHREAD_MUTEX_LOCK(mutex);
-        while (ZSTDMT_doesOverlapWindow(buffer, mtctx->serial.ldmWindow)) {
-            DEBUGLOG(5, "Waiting for LDM to finish...");
-            ZSTD_pthread_cond_wait(&mtctx->serial.ldmWindowCond, mutex);
-        }
-        DEBUGLOG(6, "Done waiting for LDM to finish");
-        ZSTD_pthread_mutex_unlock(mutex);
-    }
-}
-
-/**
- * Attempts to set the inBuff to the next section to fill.
- * If any part of the new section is still in use we give up.
- * Returns non-zero if the buffer is filled.
- */
-static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx)
-{
-    range_t const inUse = ZSTDMT_getInputDataInUse(mtctx);
-    size_t const spaceLeft = mtctx->roundBuff.capacity - mtctx->roundBuff.pos;
-    size_t const target = mtctx->targetSectionSize;
-    buffer_t buffer;
-
-    DEBUGLOG(5, "ZSTDMT_tryGetInputRange");
-    assert(mtctx->inBuff.buffer.start == NULL);
-    assert(mtctx->roundBuff.capacity >= target);
-
-    if (spaceLeft < target) {
-        /* ZSTD_invalidateRepCodes() doesn't work for extDict variants.
-         * Simply copy the prefix to the beginning in that case.
-         */
-        BYTE* const start = (BYTE*)mtctx->roundBuff.buffer;
-        size_t const prefixSize = mtctx->inBuff.prefix.size;
-
-        buffer.start = start;
-        buffer.capacity = prefixSize;
-        if (ZSTDMT_isOverlapped(buffer, inUse)) {
-            DEBUGLOG(5, "Waiting for buffer...");
-            return 0;
-        }
-        ZSTDMT_waitForLdmComplete(mtctx, buffer);
-        ZSTD_memmove(start, mtctx->inBuff.prefix.start, prefixSize);
-        mtctx->inBuff.prefix.start = start;
-        mtctx->roundBuff.pos = prefixSize;
-    }
-    buffer.start = mtctx->roundBuff.buffer + mtctx->roundBuff.pos;
-    buffer.capacity = target;
-
-    if (ZSTDMT_isOverlapped(buffer, inUse)) {
-        DEBUGLOG(5, "Waiting for buffer...");
-        return 0;
-    }
-    assert(!ZSTDMT_isOverlapped(buffer, mtctx->inBuff.prefix));
-
-    ZSTDMT_waitForLdmComplete(mtctx, buffer);
-
-    DEBUGLOG(5, "Using prefix range [%zx, %zx)",
-                (size_t)mtctx->inBuff.prefix.start,
-                (size_t)mtctx->inBuff.prefix.start + mtctx->inBuff.prefix.size);
-    DEBUGLOG(5, "Using source range [%zx, %zx)",
-                (size_t)buffer.start,
-                (size_t)buffer.start + buffer.capacity);
-
-
-    mtctx->inBuff.buffer = buffer;
-    mtctx->inBuff.filled = 0;
-    assert(mtctx->roundBuff.pos + buffer.capacity <= mtctx->roundBuff.capacity);
-    return 1;
-}
-
-typedef struct {
-  size_t toLoad;  /* The number of bytes to load from the input. */
-  int flush;      /* Boolean declaring if we must flush because we found a synchronization point. */
-} syncPoint_t;
-
-/**
- * Searches through the input for a synchronization point. If one is found, we
- * will instruct the caller to flush, and return the number of bytes to load.
- * Otherwise, we will load as many bytes as possible and instruct the caller
- * to continue as normal.
- */
-static syncPoint_t
-findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
-{
-    BYTE const* const istart = (BYTE const*)input.src + input.pos;
-    U64 const primePower = mtctx->rsync.primePower;
-    U64 const hitMask = mtctx->rsync.hitMask;
-
-    syncPoint_t syncPoint;
-    U64 hash;
-    BYTE const* prev;
-    size_t pos;
-
-    syncPoint.toLoad = MIN(input.size - input.pos, mtctx->targetSectionSize - mtctx->inBuff.filled);
-    syncPoint.flush = 0;
-    if (!mtctx->params.rsyncable)
-        /* Rsync is disabled. */
-        return syncPoint;
-    if (mtctx->inBuff.filled + input.size - input.pos < RSYNC_MIN_BLOCK_SIZE)
-        /* We don't emit synchronization points if it would produce too small blocks.
-         * We don't have enough input to find a synchronization point, so don't look.
-         */
-        return syncPoint;
-    if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH)
-        /* Not enough to compute the hash.
-         * We will miss any synchronization points in this RSYNC_LENGTH byte
-         * window. However, since it depends only in the internal buffers, if the
-         * state is already synchronized, we will remain synchronized.
-         * Additionally, the probability that we miss a synchronization point is
-         * low: RSYNC_LENGTH / targetSectionSize.
-         */
-        return syncPoint;
-    /* Initialize the loop variables. */
-    if (mtctx->inBuff.filled < RSYNC_MIN_BLOCK_SIZE) {
-        /* We don't need to scan the first RSYNC_MIN_BLOCK_SIZE positions
-         * because they can't possibly be a sync point. So we can start
-         * part way through the input buffer.
-         */
-        pos = RSYNC_MIN_BLOCK_SIZE - mtctx->inBuff.filled;
-        if (pos >= RSYNC_LENGTH) {
-            prev = istart + pos - RSYNC_LENGTH;
-            hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
-        } else {
-            assert(mtctx->inBuff.filled >= RSYNC_LENGTH);
-            prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
-            hash = ZSTD_rollingHash_compute(prev + pos, (RSYNC_LENGTH - pos));
-            hash = ZSTD_rollingHash_append(hash, istart, pos);
-        }
-    } else {
-        /* We have enough bytes buffered to initialize the hash,
-         * and are have processed enough bytes to find a sync point.
-         * Start scanning at the beginning of the input.
-         */
-        assert(mtctx->inBuff.filled >= RSYNC_MIN_BLOCK_SIZE);
-        assert(RSYNC_MIN_BLOCK_SIZE >= RSYNC_LENGTH);
-        pos = 0;
-        prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
-        hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
-        if ((hash & hitMask) == hitMask) {
-            /* We're already at a sync point so don't load any more until
-             * we're able to flush this sync point.
-             * This likely happened because the job table was full so we
-             * couldn't add our job.
-             */
-            syncPoint.toLoad = 0;
-            syncPoint.flush = 1;
-            return syncPoint;
-        }
-    }
-    /* Starting with the hash of the previous RSYNC_LENGTH bytes, roll
-     * through the input. If we hit a synchronization point, then cut the
-     * job off, and tell the compressor to flush the job. Otherwise, load
-     * all the bytes and continue as normal.
-     * If we go too long without a synchronization point (targetSectionSize)
-     * then a block will be emitted anyways, but this is okay, since if we
-     * are already synchronized we will remain synchronized.
-     */
-    for (; pos < syncPoint.toLoad; ++pos) {
-        BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
-        assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
-        hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
-        assert(mtctx->inBuff.filled + pos >= RSYNC_MIN_BLOCK_SIZE);
-        if ((hash & hitMask) == hitMask) {
-            syncPoint.toLoad = pos + 1;
-            syncPoint.flush = 1;
-            break;
-        }
-    }
-    return syncPoint;
-}
-
-size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx)
-{
-    size_t hintInSize = mtctx->targetSectionSize - mtctx->inBuff.filled;
-    if (hintInSize==0) hintInSize = mtctx->targetSectionSize;
-    return hintInSize;
-}
-
-/** ZSTDMT_compressStream_generic() :
- *  internal use only - exposed to be invoked from zstd_compress.c
- *  assumption : output and input are valid (pos <= size)
- * @return : minimum amount of data remaining to flush, 0 if none */
-size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
-                                     ZSTD_outBuffer* output,
-                                     ZSTD_inBuffer* input,
-                                     ZSTD_EndDirective endOp)
-{
-    unsigned forwardInputProgress = 0;
-    DEBUGLOG(5, "ZSTDMT_compressStream_generic (endOp=%u, srcSize=%u)",
-                (U32)endOp, (U32)(input->size - input->pos));
-    assert(output->pos <= output->size);
-    assert(input->pos  <= input->size);
-
-    if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) {
-        /* current frame being ended. Only flush/end are allowed */
-        return ERROR(stage_wrong);
-    }
-
-    /* fill input buffer */
-    if ( (!mtctx->jobReady)
-      && (input->size > input->pos) ) {   /* support NULL input */
-        if (mtctx->inBuff.buffer.start == NULL) {
-            assert(mtctx->inBuff.filled == 0); /* Can't fill an empty buffer */
-            if (!ZSTDMT_tryGetInputRange(mtctx)) {
-                /* It is only possible for this operation to fail if there are
-                 * still compression jobs ongoing.
-                 */
-                DEBUGLOG(5, "ZSTDMT_tryGetInputRange failed");
-                assert(mtctx->doneJobID != mtctx->nextJobID);
-            } else
-                DEBUGLOG(5, "ZSTDMT_tryGetInputRange completed successfully : mtctx->inBuff.buffer.start = %p", mtctx->inBuff.buffer.start);
-        }
-        if (mtctx->inBuff.buffer.start != NULL) {
-            syncPoint_t const syncPoint = findSynchronizationPoint(mtctx, *input);
-            if (syncPoint.flush && endOp == ZSTD_e_continue) {
-                endOp = ZSTD_e_flush;
-            }
-            assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize);
-            DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u",
-                        (U32)syncPoint.toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize);
-            ZSTD_memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, syncPoint.toLoad);
-            input->pos += syncPoint.toLoad;
-            mtctx->inBuff.filled += syncPoint.toLoad;
-            forwardInputProgress = syncPoint.toLoad>0;
-        }
-    }
-    if ((input->pos < input->size) && (endOp == ZSTD_e_end)) {
-        /* Can't end yet because the input is not fully consumed.
-            * We are in one of these cases:
-            * - mtctx->inBuff is NULL & empty: we couldn't get an input buffer so don't create a new job.
-            * - We filled the input buffer: flush this job but don't end the frame.
-            * - We hit a synchronization point: flush this job but don't end the frame.
-            */
-        assert(mtctx->inBuff.filled == 0 || mtctx->inBuff.filled == mtctx->targetSectionSize || mtctx->params.rsyncable);
-        endOp = ZSTD_e_flush;
-    }
-
-    if ( (mtctx->jobReady)
-      || (mtctx->inBuff.filled >= mtctx->targetSectionSize)  /* filled enough : let's compress */
-      || ((endOp != ZSTD_e_continue) && (mtctx->inBuff.filled > 0))  /* something to flush : let's go */
-      || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) {   /* must finish the frame with a zero-size block */
-        size_t const jobSize = mtctx->inBuff.filled;
-        assert(mtctx->inBuff.filled <= mtctx->targetSectionSize);
-        FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) , "");
-    }
-
-    /* check for potential compressed data ready to be flushed */
-    {   size_t const remainingToFlush = ZSTDMT_flushProduced(mtctx, output, !forwardInputProgress, endOp); /* block if there was no forward input progress */
-        if (input->pos < input->size) return MAX(remainingToFlush, 1);  /* input not consumed : do not end flush yet */
-        DEBUGLOG(5, "end of ZSTDMT_compressStream_generic: remainingToFlush = %u", (U32)remainingToFlush);
-        return remainingToFlush;
-    }
-}
diff --git a/dep/zstd/lib/compress/zstdmt_compress.h b/dep/zstd/lib/compress/zstdmt_compress.h
deleted file mode 100644
index 271eb1ac7..000000000
--- a/dep/zstd/lib/compress/zstdmt_compress.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
- #ifndef ZSTDMT_COMPRESS_H
- #define ZSTDMT_COMPRESS_H
-
- #if defined (__cplusplus)
- extern "C" {
- #endif
-
-
-/* Note : This is an internal API.
- *        These APIs used to be exposed with ZSTDLIB_API,
- *        because it used to be the only way to invoke MT compression.
- *        Now, you must use ZSTD_compress2 and ZSTD_compressStream2() instead.
- *
- *        This API requires ZSTD_MULTITHREAD to be defined during compilation,
- *        otherwise ZSTDMT_createCCtx*() will fail.
- */
-
-/* ===   Dependencies   === */
-#include "../common/zstd_deps.h"   /* size_t */
-#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_parameters */
-#include "../zstd.h"            /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
-
-
-/* ===   Constants   === */
-#ifndef ZSTDMT_NBWORKERS_MAX /* a different value can be selected at compile time */
-#  define ZSTDMT_NBWORKERS_MAX ((sizeof(void*)==4) /*32-bit*/ ? 64 : 256)
-#endif
-#ifndef ZSTDMT_JOBSIZE_MIN   /* a different value can be selected at compile time */
-#  define ZSTDMT_JOBSIZE_MIN (512 KB)
-#endif
-#define ZSTDMT_JOBLOG_MAX   (MEM_32bits() ? 29 : 30)
-#define ZSTDMT_JOBSIZE_MAX  (MEM_32bits() ? (512 MB) : (1024 MB))
-
-
-/* ========================================================
- * ===  Private interface, for use by ZSTD_compress.c   ===
- * ===  Not exposed in libzstd. Never invoke directly   ===
- * ======================================================== */
-
-/* ===   Memory management   === */
-typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
-/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
-ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
-                                        ZSTD_customMem cMem,
-					ZSTD_threadPool *pool);
-size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
-
-size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
-
-/* ===   Streaming functions   === */
-
-size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
-
-/*! ZSTDMT_initCStream_internal() :
- *  Private use only. Init streaming operation.
- *  expects params to be valid.
- *  must receive dict, or cdict, or none, but not both.
- *  mtctx can be freshly constructed or reused from a prior compression.
- *  If mtctx is reused, memory allocations from the prior compression may not be freed,
- *  even if they are not needed for the current compression.
- *  @return : 0, or an error code */
-size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* mtctx,
-                    const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
-                    const ZSTD_CDict* cdict,
-                    ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
-
-/*! ZSTDMT_compressStream_generic() :
- *  Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
- *  depending on flush directive.
- * @return : minimum amount of data still to be flushed
- *           0 if fully flushed
- *           or an error code
- *  note : needs to be init using any ZSTD_initCStream*() variant */
-size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
-                                     ZSTD_outBuffer* output,
-                                     ZSTD_inBuffer* input,
-                                     ZSTD_EndDirective endOp);
-
- /*! ZSTDMT_toFlushNow()
-  *  Tell how many bytes are ready to be flushed immediately.
-  *  Probe the oldest active job (not yet entirely flushed) and check its output buffer.
-  *  If return 0, it means there is no active job,
-  *  or, it means oldest job is still active, but everything produced has been flushed so far,
-  *  therefore flushing is limited by speed of oldest job. */
-size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx);
-
-/*! ZSTDMT_updateCParams_whileCompressing() :
- *  Updates only a selected set of compression parameters, to remain compatible with current frame.
- *  New parameters will be applied to next compression job. */
-void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams);
-
-/*! ZSTDMT_getFrameProgression():
- *  tells how much data has been consumed (input) and produced (output) for current frame.
- *  able to count progression inside worker threads.
- */
-ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
-
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif   /* ZSTDMT_COMPRESS_H */
diff --git a/dep/zstd/lib/decompress/huf_decompress.c b/dep/zstd/lib/decompress/huf_decompress.c
deleted file mode 100644
index 202718825..000000000
--- a/dep/zstd/lib/decompress/huf_decompress.c
+++ /dev/null
@@ -1,1889 +0,0 @@
-/* ******************************************************************
- * huff0 huffman decoder,
- * part of Finite State Entropy library
- * Copyright (c) Yann Collet, Facebook, Inc.
- *
- *  You can contact the author at :
- *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
-****************************************************************** */
-
-/* **************************************************************
-*  Dependencies
-****************************************************************/
-#include "../common/zstd_deps.h"  /* ZSTD_memcpy, ZSTD_memset */
-#include "../common/compiler.h"
-#include "../common/bitstream.h"  /* BIT_* */
-#include "../common/fse.h"        /* to compress headers */
-#define HUF_STATIC_LINKING_ONLY
-#include "../common/huf.h"
-#include "../common/error_private.h"
-#include "../common/zstd_internal.h"
-
-/* **************************************************************
-*  Constants
-****************************************************************/
-
-#define HUF_DECODER_FAST_TABLELOG 11
-
-/* **************************************************************
-*  Macros
-****************************************************************/
-
-/* These two optional macros force the use one way or another of the two
- * Huffman decompression implementations. You can't force in both directions
- * at the same time.
- */
-#if defined(HUF_FORCE_DECOMPRESS_X1) && \
-    defined(HUF_FORCE_DECOMPRESS_X2)
-#error "Cannot force the use of the X1 and X2 decoders at the same time!"
-#endif
-
-#if ZSTD_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2
-# define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
-#else
-# define HUF_ASM_X86_64_BMI2_ATTRS
-#endif
-
-#ifdef __cplusplus
-# define HUF_EXTERN_C extern "C"
-#else
-# define HUF_EXTERN_C
-#endif
-#define HUF_ASM_DECL HUF_EXTERN_C
-
-#if DYNAMIC_BMI2 || (ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
-# define HUF_NEED_BMI2_FUNCTION 1
-#else
-# define HUF_NEED_BMI2_FUNCTION 0
-#endif
-
-#if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
-# define HUF_NEED_DEFAULT_FUNCTION 1
-#else
-# define HUF_NEED_DEFAULT_FUNCTION 0
-#endif
-
-/* **************************************************************
-*  Error Management
-****************************************************************/
-#define HUF_isError ERR_isError
-
-
-/* **************************************************************
-*  Byte alignment for workSpace management
-****************************************************************/
-#define HUF_ALIGN(x, a)         HUF_ALIGN_MASK((x), (a) - 1)
-#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
-
-
-/* **************************************************************
-*  BMI2 Variant Wrappers
-****************************************************************/
-#if DYNAMIC_BMI2
-
-#define HUF_DGEN(fn)                                                        \
-                                                                            \
-    static size_t fn##_default(                                             \
-                  void* dst,  size_t dstSize,                               \
-            const void* cSrc, size_t cSrcSize,                              \
-            const HUF_DTable* DTable)                                       \
-    {                                                                       \
-        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
-    }                                                                       \
-                                                                            \
-    static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2(                          \
-                  void* dst,  size_t dstSize,                               \
-            const void* cSrc, size_t cSrcSize,                              \
-            const HUF_DTable* DTable)                                       \
-    {                                                                       \
-        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
-    }                                                                       \
-                                                                            \
-    static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
-                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
-    {                                                                       \
-        if (bmi2) {                                                         \
-            return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);         \
-        }                                                                   \
-        return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable);          \
-    }
-
-#else
-
-#define HUF_DGEN(fn)                                                        \
-    static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
-                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
-    {                                                                       \
-        (void)bmi2;                                                         \
-        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
-    }
-
-#endif
-
-
-/*-***************************/
-/*  generic DTableDesc       */
-/*-***************************/
-typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
-
-static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
-{
-    DTableDesc dtd;
-    ZSTD_memcpy(&dtd, table, sizeof(dtd));
-    return dtd;
-}
-
-#if ZSTD_ENABLE_ASM_X86_64_BMI2
-
-static size_t HUF_initDStream(BYTE const* ip) {
-    BYTE const lastByte = ip[7];
-    size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
-    size_t const value = MEM_readLEST(ip) | 1;
-    assert(bitsConsumed <= 8);
-    return value << bitsConsumed;
-}
-typedef struct {
-    BYTE const* ip[4];
-    BYTE* op[4];
-    U64 bits[4];
-    void const* dt;
-    BYTE const* ilimit;
-    BYTE* oend;
-    BYTE const* iend[4];
-} HUF_DecompressAsmArgs;
-
-/**
- * Initializes args for the asm decoding loop.
- * @returns 0 on success
- *          1 if the fallback implementation should be used.
- *          Or an error code on failure.
- */
-static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
-{
-    void const* dt = DTable + 1;
-    U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
-
-    const BYTE* const ilimit = (const BYTE*)src + 6 + 8;
-
-    BYTE* const oend = (BYTE*)dst + dstSize;
-
-    /* The following condition is false on x32 platform,
-     * but HUF_asm is not compatible with this ABI */
-    if (!(MEM_isLittleEndian() && !MEM_32bits())) return 1;
-
-    /* strict minimum : jump table + 1 byte per stream */
-    if (srcSize < 10)
-        return ERROR(corruption_detected);
-
-    /* Must have at least 8 bytes per stream because we don't handle initializing smaller bit containers.
-     * If table log is not correct at this point, fallback to the old decoder.
-     * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder.
-     */
-    if (dtLog != HUF_DECODER_FAST_TABLELOG)
-        return 1;
-
-    /* Read the jump table. */
-    {
-        const BYTE* const istart = (const BYTE*)src;
-        size_t const length1 = MEM_readLE16(istart);
-        size_t const length2 = MEM_readLE16(istart+2);
-        size_t const length3 = MEM_readLE16(istart+4);
-        size_t const length4 = srcSize - (length1 + length2 + length3 + 6);
-        args->iend[0] = istart + 6;  /* jumpTable */
-        args->iend[1] = args->iend[0] + length1;
-        args->iend[2] = args->iend[1] + length2;
-        args->iend[3] = args->iend[2] + length3;
-
-        /* HUF_initDStream() requires this, and this small of an input
-         * won't benefit from the ASM loop anyways.
-         * length1 must be >= 16 so that ip[0] >= ilimit before the loop
-         * starts.
-         */
-        if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)
-            return 1;
-        if (length4 > srcSize) return ERROR(corruption_detected);   /* overflow */
-    }
-    /* ip[] contains the position that is currently loaded into bits[]. */
-    args->ip[0] = args->iend[1] - sizeof(U64);
-    args->ip[1] = args->iend[2] - sizeof(U64);
-    args->ip[2] = args->iend[3] - sizeof(U64);
-    args->ip[3] = (BYTE const*)src + srcSize - sizeof(U64);
-
-    /* op[] contains the output pointers. */
-    args->op[0] = (BYTE*)dst;
-    args->op[1] = args->op[0] + (dstSize+3)/4;
-    args->op[2] = args->op[1] + (dstSize+3)/4;
-    args->op[3] = args->op[2] + (dstSize+3)/4;
-
-    /* No point to call the ASM loop for tiny outputs. */
-    if (args->op[3] >= oend)
-        return 1;
-
-    /* bits[] is the bit container.
-        * It is read from the MSB down to the LSB.
-        * It is shifted left as it is read, and zeros are
-        * shifted in. After the lowest valid bit a 1 is
-        * set, so that CountTrailingZeros(bits[]) can be used
-        * to count how many bits we've consumed.
-        */
-    args->bits[0] = HUF_initDStream(args->ip[0]);
-    args->bits[1] = HUF_initDStream(args->ip[1]);
-    args->bits[2] = HUF_initDStream(args->ip[2]);
-    args->bits[3] = HUF_initDStream(args->ip[3]);
-
-    /* If ip[] >= ilimit, it is guaranteed to be safe to
-        * reload bits[]. It may be beyond its section, but is
-        * guaranteed to be valid (>= istart).
-        */
-    args->ilimit = ilimit;
-
-    args->oend = oend;
-    args->dt = dt;
-
-    return 0;
-}
-
-static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs const* args, int stream, BYTE* segmentEnd)
-{
-    /* Validate that we haven't overwritten. */
-    if (args->op[stream] > segmentEnd)
-        return ERROR(corruption_detected);
-    /* Validate that we haven't read beyond iend[].
-        * Note that ip[] may be < iend[] because the MSB is
-        * the next bit to read, and we may have consumed 100%
-        * of the stream, so down to iend[i] - 8 is valid.
-        */
-    if (args->ip[stream] < args->iend[stream] - 8)
-        return ERROR(corruption_detected);
-
-    /* Construct the BIT_DStream_t. */
-    bit->bitContainer = MEM_readLE64(args->ip[stream]);
-    bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]);
-    bit->start = (const char*)args->iend[0];
-    bit->limitPtr = bit->start + sizeof(size_t);
-    bit->ptr = (const char*)args->ip[stream];
-
-    return 0;
-}
-#endif
-
-
-#ifndef HUF_FORCE_DECOMPRESS_X2
-
-/*-***************************/
-/*  single-symbol decoding   */
-/*-***************************/
-typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1;   /* single-symbol decoding */
-
-/**
- * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
- * a time.
- */
-static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
-    U64 D4;
-    if (MEM_isLittleEndian()) {
-        D4 = (symbol << 8) + nbBits;
-    } else {
-        D4 = symbol + (nbBits << 8);
-    }
-    D4 *= 0x0001000100010001ULL;
-    return D4;
-}
-
-/**
- * Increase the tableLog to targetTableLog and rescales the stats.
- * If tableLog > targetTableLog this is a no-op.
- * @returns New tableLog
- */
-static U32 HUF_rescaleStats(BYTE* huffWeight, U32* rankVal, U32 nbSymbols, U32 tableLog, U32 targetTableLog)
-{
-    if (tableLog > targetTableLog)
-        return tableLog;
-    if (tableLog < targetTableLog) {
-        U32 const scale = targetTableLog - tableLog;
-        U32 s;
-        /* Increase the weight for all non-zero probability symbols by scale. */
-        for (s = 0; s < nbSymbols; ++s) {
-            huffWeight[s] += (BYTE)((huffWeight[s] == 0) ? 0 : scale);
-        }
-        /* Update rankVal to reflect the new weights.
-         * All weights except 0 get moved to weight + scale.
-         * Weights [1, scale] are empty.
-         */
-        for (s = targetTableLog; s > scale; --s) {
-            rankVal[s] = rankVal[s - scale];
-        }
-        for (s = scale; s > 0; --s) {
-            rankVal[s] = 0;
-        }
-    }
-    return targetTableLog;
-}
-
-typedef struct {
-        U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
-        U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
-        U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
-        BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
-        BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
-} HUF_ReadDTableX1_Workspace;
-
-
-size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
-{
-    return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
-}
-
-size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
-{
-    U32 tableLog = 0;
-    U32 nbSymbols = 0;
-    size_t iSize;
-    void* const dtPtr = DTable + 1;
-    HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
-    HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
-
-    DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
-    if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
-
-    DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
-    /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
-
-    iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
-    if (HUF_isError(iSize)) return iSize;
-
-
-    /* Table header */
-    {   DTableDesc dtd = HUF_getDTableDesc(DTable);
-        U32 const maxTableLog = dtd.maxTableLog + 1;
-        U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG);
-        tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog);
-        if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, Huffman tree cannot fit in */
-        dtd.tableType = 0;
-        dtd.tableLog = (BYTE)tableLog;
-        ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
-    }
-
-    /* Compute symbols and rankStart given rankVal:
-     *
-     * rankVal already contains the number of values of each weight.
-     *
-     * symbols contains the symbols ordered by weight. First are the rankVal[0]
-     * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
-     * symbols[0] is filled (but unused) to avoid a branch.
-     *
-     * rankStart contains the offset where each rank belongs in the DTable.
-     * rankStart[0] is not filled because there are no entries in the table for
-     * weight 0.
-     */
-    {
-        int n;
-        int nextRankStart = 0;
-        int const unroll = 4;
-        int const nLimit = (int)nbSymbols - unroll + 1;
-        for (n=0; n<(int)tableLog+1; n++) {
-            U32 const curr = nextRankStart;
-            nextRankStart += wksp->rankVal[n];
-            wksp->rankStart[n] = curr;
-        }
-        for (n=0; n < nLimit; n += unroll) {
-            int u;
-            for (u=0; u < unroll; ++u) {
-                size_t const w = wksp->huffWeight[n+u];
-                wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
-            }
-        }
-        for (; n < (int)nbSymbols; ++n) {
-            size_t const w = wksp->huffWeight[n];
-            wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
-        }
-    }
-
-    /* fill DTable
-     * We fill all entries of each weight in order.
-     * That way length is a constant for each iteration of the outer loop.
-     * We can switch based on the length to a different inner loop which is
-     * optimized for that particular case.
-     */
-    {
-        U32 w;
-        int symbol=wksp->rankVal[0];
-        int rankStart=0;
-        for (w=1; w<tableLog+1; ++w) {
-            int const symbolCount = wksp->rankVal[w];
-            int const length = (1 << w) >> 1;
-            int uStart = rankStart;
-            BYTE const nbBits = (BYTE)(tableLog + 1 - w);
-            int s;
-            int u;
-            switch (length) {
-            case 1:
-                for (s=0; s<symbolCount; ++s) {
-                    HUF_DEltX1 D;
-                    D.byte = wksp->symbols[symbol + s];
-                    D.nbBits = nbBits;
-                    dt[uStart] = D;
-                    uStart += 1;
-                }
-                break;
-            case 2:
-                for (s=0; s<symbolCount; ++s) {
-                    HUF_DEltX1 D;
-                    D.byte = wksp->symbols[symbol + s];
-                    D.nbBits = nbBits;
-                    dt[uStart+0] = D;
-                    dt[uStart+1] = D;
-                    uStart += 2;
-                }
-                break;
-            case 4:
-                for (s=0; s<symbolCount; ++s) {
-                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
-                    MEM_write64(dt + uStart, D4);
-                    uStart += 4;
-                }
-                break;
-            case 8:
-                for (s=0; s<symbolCount; ++s) {
-                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
-                    MEM_write64(dt + uStart, D4);
-                    MEM_write64(dt + uStart + 4, D4);
-                    uStart += 8;
-                }
-                break;
-            default:
-                for (s=0; s<symbolCount; ++s) {
-                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
-                    for (u=0; u < length; u += 16) {
-                        MEM_write64(dt + uStart + u + 0, D4);
-                        MEM_write64(dt + uStart + u + 4, D4);
-                        MEM_write64(dt + uStart + u + 8, D4);
-                        MEM_write64(dt + uStart + u + 12, D4);
-                    }
-                    assert(u == length);
-                    uStart += length;
-                }
-                break;
-            }
-            symbol += symbolCount;
-            rankStart += symbolCount * length;
-        }
-    }
-    return iSize;
-}
-
-FORCE_INLINE_TEMPLATE BYTE
-HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
-{
-    size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
-    BYTE const c = dt[val].byte;
-    BIT_skipBits(Dstream, dt[val].nbBits);
-    return c;
-}
-
-#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
-    *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
-
-#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)  \
-    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
-        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
-
-#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
-    if (MEM_64bits()) \
-        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
-
-HINT_INLINE size_t
-HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
-{
-    BYTE* const pStart = p;
-
-    /* up to 4 symbols at a time */
-    if ((pEnd - p) > 3) {
-        while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
-            HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
-            HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
-            HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
-            HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
-        }
-    } else {
-        BIT_reloadDStream(bitDPtr);
-    }
-
-    /* [0-3] symbols remaining */
-    if (MEM_32bits())
-        while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
-            HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
-
-    /* no more data to retrieve from bitstream, no need to reload */
-    while (p < pEnd)
-        HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
-
-    return pEnd-pStart;
-}
-
-FORCE_INLINE_TEMPLATE size_t
-HUF_decompress1X1_usingDTable_internal_body(
-          void* dst,  size_t dstSize,
-    const void* cSrc, size_t cSrcSize,
-    const HUF_DTable* DTable)
-{
-    BYTE* op = (BYTE*)dst;
-    BYTE* const oend = op + dstSize;
-    const void* dtPtr = DTable + 1;
-    const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
-    BIT_DStream_t bitD;
-    DTableDesc const dtd = HUF_getDTableDesc(DTable);
-    U32 const dtLog = dtd.tableLog;
-
-    CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
-
-    HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
-
-    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
-
-    return dstSize;
-}
-
-FORCE_INLINE_TEMPLATE size_t
-HUF_decompress4X1_usingDTable_internal_body(
-          void* dst,  size_t dstSize,
-    const void* cSrc, size_t cSrcSize,
-    const HUF_DTable* DTable)
-{
-    /* Check */
-    if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
-
-    {   const BYTE* const istart = (const BYTE*) cSrc;
-        BYTE* const ostart = (BYTE*) dst;
-        BYTE* const oend = ostart + dstSize;
-        BYTE* const olimit = oend - 3;
-        const void* const dtPtr = DTable + 1;
-        const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
-
-        /* Init */
-        BIT_DStream_t bitD1;
-        BIT_DStream_t bitD2;
-        BIT_DStream_t bitD3;
-        BIT_DStream_t bitD4;
-        size_t const length1 = MEM_readLE16(istart);
-        size_t const length2 = MEM_readLE16(istart+2);
-        size_t const length3 = MEM_readLE16(istart+4);
-        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
-        const BYTE* const istart1 = istart + 6;  /* jumpTable */
-        const BYTE* const istart2 = istart1 + length1;
-        const BYTE* const istart3 = istart2 + length2;
-        const BYTE* const istart4 = istart3 + length3;
-        const size_t segmentSize = (dstSize+3) / 4;
-        BYTE* const opStart2 = ostart + segmentSize;
-        BYTE* const opStart3 = opStart2 + segmentSize;
-        BYTE* const opStart4 = opStart3 + segmentSize;
-        BYTE* op1 = ostart;
-        BYTE* op2 = opStart2;
-        BYTE* op3 = opStart3;
-        BYTE* op4 = opStart4;
-        DTableDesc const dtd = HUF_getDTableDesc(DTable);
-        U32 const dtLog = dtd.tableLog;
-        U32 endSignal = 1;
-
-        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
-        if (opStart4 > oend) return ERROR(corruption_detected);      /* overflow */
-        CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
-        CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
-        CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
-        CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
-
-        /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
-        if ((size_t)(oend - op4) >= sizeof(size_t)) {
-            for ( ; (endSignal) & (op4 < olimit) ; ) {
-                HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
-                HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
-                HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
-                HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
-                HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
-                HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
-                HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
-                HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
-                HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
-                HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
-                HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
-                HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
-                HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
-                HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
-                HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
-                HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
-                endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
-                endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
-                endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
-                endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
-            }
-        }
-
-        /* check corruption */
-        /* note : should not be necessary : op# advance in lock step, and we control op4.
-         *        but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
-        if (op1 > opStart2) return ERROR(corruption_detected);
-        if (op2 > opStart3) return ERROR(corruption_detected);
-        if (op3 > opStart4) return ERROR(corruption_detected);
-        /* note : op4 supposed already verified within main loop */
-
-        /* finish bitStreams one by one */
-        HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
-        HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
-        HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
-        HUF_decodeStreamX1(op4, &bitD4, oend,     dt, dtLog);
-
-        /* check */
-        { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
-          if (!endCheck) return ERROR(corruption_detected); }
-
-        /* decoded size */
-        return dstSize;
-    }
-}
-
-#if HUF_NEED_BMI2_FUNCTION
-static BMI2_TARGET_ATTRIBUTE
-size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
-                    size_t cSrcSize, HUF_DTable const* DTable) {
-    return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
-}
-#endif
-
-#if HUF_NEED_DEFAULT_FUNCTION
-static
-size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
-                    size_t cSrcSize, HUF_DTable const* DTable) {
-    return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
-}
-#endif
-
-#if ZSTD_ENABLE_ASM_X86_64_BMI2
-
-HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
-
-static HUF_ASM_X86_64_BMI2_ATTRS
-size_t
-HUF_decompress4X1_usingDTable_internal_bmi2_asm(
-          void* dst,  size_t dstSize,
-    const void* cSrc, size_t cSrcSize,
-    const HUF_DTable* DTable)
-{
-    void const* dt = DTable + 1;
-    const BYTE* const iend = (const BYTE*)cSrc + 6;
-    BYTE* const oend = (BYTE*)dst + dstSize;
-    HUF_DecompressAsmArgs args;
-    {
-        size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
-        FORWARD_IF_ERROR(ret, "Failed to init asm args");
-        if (ret != 0)
-            return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
-    }
-
-    assert(args.ip[0] >= args.ilimit);
-    HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(&args);
-
-    /* Our loop guarantees that ip[] >= ilimit and that we haven't
-    * overwritten any op[].
-    */
-    assert(args.ip[0] >= iend);
-    assert(args.ip[1] >= iend);
-    assert(args.ip[2] >= iend);
-    assert(args.ip[3] >= iend);
-    assert(args.op[3] <= oend);
-    (void)iend;
-
-    /* finish bit streams one by one. */
-    {
-        size_t const segmentSize = (dstSize+3) / 4;
-        BYTE* segmentEnd = (BYTE*)dst;
-        int i;
-        for (i = 0; i < 4; ++i) {
-            BIT_DStream_t bit;
-            if (segmentSize <= (size_t)(oend - segmentEnd))
-                segmentEnd += segmentSize;
-            else
-                segmentEnd = oend;
-            FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
-            /* Decompress and validate that we've produced exactly the expected length. */
-            args.op[i] += HUF_decodeStreamX1(args.op[i], &bit, segmentEnd, (HUF_DEltX1 const*)dt, HUF_DECODER_FAST_TABLELOG);
-            if (args.op[i] != segmentEnd) return ERROR(corruption_detected);
-        }
-    }
-
-    /* decoded size */
-    return dstSize;
-}
-#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
-
-typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
-                                               const void *cSrc,
-                                               size_t cSrcSize,
-                                               const HUF_DTable *DTable);
-
-HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
-
-static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
-                    size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
-{
-#if DYNAMIC_BMI2
-    if (bmi2) {
-# if ZSTD_ENABLE_ASM_X86_64_BMI2
-        return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
-# else
-        return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
-# endif
-    }
-#else
-    (void)bmi2;
-#endif
-
-#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
-    return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
-#else
-    return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
-#endif
-}
-
-
-size_t HUF_decompress1X1_usingDTable(
-          void* dst,  size_t dstSize,
-    const void* cSrc, size_t cSrcSize,
-    const HUF_DTable* DTable)
-{
-    DTableDesc dtd = HUF_getDTableDesc(DTable);
-    if (dtd.tableType != 0) return ERROR(GENERIC);
-    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
-}
-
-size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
-                                   const void* cSrc, size_t cSrcSize,
-                                   void* workSpace, size_t wkspSize)
-{
-    const BYTE* ip = (const BYTE*) cSrc;
-
-    size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
-    if (HUF_isError(hSize)) return hSize;
-    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
-    ip += hSize; cSrcSize -= hSize;
-
-    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
-}
-
-
-size_t HUF_decompress4X1_usingDTable(
-          void* dst,  size_t dstSize,
-    const void* cSrc, size_t cSrcSize,
-    const HUF_DTable* DTable)
-{
-    DTableDesc dtd = HUF_getDTableDesc(DTable);
-    if (dtd.tableType != 0) return ERROR(GENERIC);
-    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
-}
-
-static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
-                                   const void* cSrc, size_t cSrcSize,
-                                   void* workSpace, size_t wkspSize, int bmi2)
-{
-    const BYTE* ip = (const BYTE*) cSrc;
-
-    size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
-    if (HUF_isError(hSize)) return hSize;
-    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
-    ip += hSize; cSrcSize -= hSize;
-
-    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
-}
-
-size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
-                                   const void* cSrc, size_t cSrcSize,
-                                   void* workSpace, size_t wkspSize)
-{
-    return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
-}
-
-
-#endif /* HUF_FORCE_DECOMPRESS_X2 */
-
-
-#ifndef HUF_FORCE_DECOMPRESS_X1
-
-/* *************************/
-/* double-symbols decoding */
-/* *************************/
-
-typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2;  /* double-symbols decoding */
-typedef struct { BYTE symbol; } sortedSymbol_t;
-typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
-typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
-
-/**
- * Constructs a HUF_DEltX2 in a U32.
- */
-static U32 HUF_buildDEltX2U32(U32 symbol, U32 nbBits, U32 baseSeq, int level)
-{
-    U32 seq;
-    DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, sequence) == 0);
-    DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, nbBits) == 2);
-    DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, length) == 3);
-    DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U32));
-    if (MEM_isLittleEndian()) {
-        seq = level == 1 ? symbol : (baseSeq + (symbol << 8));
-        return seq + (nbBits << 16) + ((U32)level << 24);
-    } else {
-        seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol);
-        return (seq << 16) + (nbBits << 8) + (U32)level;
-    }
-}
-
-/**
- * Constructs a HUF_DEltX2.
- */
-static HUF_DEltX2 HUF_buildDEltX2(U32 symbol, U32 nbBits, U32 baseSeq, int level)
-{
-    HUF_DEltX2 DElt;
-    U32 const val = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
-    DEBUG_STATIC_ASSERT(sizeof(DElt) == sizeof(val));
-    ZSTD_memcpy(&DElt, &val, sizeof(val));
-    return DElt;
-}
-
-/**
- * Constructs 2 HUF_DEltX2s and packs them into a U64.
- */
-static U64 HUF_buildDEltX2U64(U32 symbol, U32 nbBits, U16 baseSeq, int level)
-{
-    U32 DElt = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
-    return (U64)DElt + ((U64)DElt << 32);
-}
-
-/**
- * Fills the DTable rank with all the symbols from [begin, end) that are each
- * nbBits long.
- *
- * @param DTableRank The start of the rank in the DTable.
- * @param begin The first symbol to fill (inclusive).
- * @param end The last symbol to fill (exclusive).
- * @param nbBits Each symbol is nbBits long.
- * @param tableLog The table log.
- * @param baseSeq If level == 1 { 0 } else { the first level symbol }
- * @param level The level in the table. Must be 1 or 2.
- */
-static void HUF_fillDTableX2ForWeight(
-    HUF_DEltX2* DTableRank,
-    sortedSymbol_t const* begin, sortedSymbol_t const* end,
-    U32 nbBits, U32 tableLog,
-    U16 baseSeq, int const level)
-{
-    U32 const length = 1U << ((tableLog - nbBits) & 0x1F /* quiet static-analyzer */);
-    const sortedSymbol_t* ptr;
-    assert(level >= 1 && level <= 2);
-    switch (length) {
-    case 1:
-        for (ptr = begin; ptr != end; ++ptr) {
-            HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
-            *DTableRank++ = DElt;
-        }
-        break;
-    case 2:
-        for (ptr = begin; ptr != end; ++ptr) {
-            HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
-            DTableRank[0] = DElt;
-            DTableRank[1] = DElt;
-            DTableRank += 2;
-        }
-        break;
-    case 4:
-        for (ptr = begin; ptr != end; ++ptr) {
-            U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
-            ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
-            ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
-            DTableRank += 4;
-        }
-        break;
-    case 8:
-        for (ptr = begin; ptr != end; ++ptr) {
-            U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
-            ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
-            ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
-            ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
-            ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
-            DTableRank += 8;
-        }
-        break;
-    default:
-        for (ptr = begin; ptr != end; ++ptr) {
-            U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
-            HUF_DEltX2* const DTableRankEnd = DTableRank + length;
-            for (; DTableRank != DTableRankEnd; DTableRank += 8) {
-                ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
-                ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
-                ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
-                ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
-            }
-        }
-        break;
-    }
-}
-
-/* HUF_fillDTableX2Level2() :
- * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
-static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 consumedBits,
-                           const U32* rankVal, const int minWeight, const int maxWeight1,
-                           const sortedSymbol_t* sortedSymbols, U32 const* rankStart,
-                           U32 nbBitsBaseline, U16 baseSeq)
-{
-    /* Fill skipped values (all positions up to rankVal[minWeight]).
-     * These are positions only get a single symbol because the combined weight
-     * is too large.
-     */
-    if (minWeight>1) {
-        U32 const length = 1U << ((targetLog - consumedBits) & 0x1F /* quiet static-analyzer */);
-        U64 const DEltX2 = HUF_buildDEltX2U64(baseSeq, consumedBits, /* baseSeq */ 0, /* level */ 1);
-        int const skipSize = rankVal[minWeight];
-        assert(length > 1);
-        assert((U32)skipSize < length);
-        switch (length) {
-        case 2:
-            assert(skipSize == 1);
-            ZSTD_memcpy(DTable, &DEltX2, sizeof(DEltX2));
-            break;
-        case 4:
-            assert(skipSize <= 4);
-            ZSTD_memcpy(DTable + 0, &DEltX2, sizeof(DEltX2));
-            ZSTD_memcpy(DTable + 2, &DEltX2, sizeof(DEltX2));
-            break;
-        default:
-            {
-                int i;
-                for (i = 0; i < skipSize; i += 8) {
-                    ZSTD_memcpy(DTable + i + 0, &DEltX2, sizeof(DEltX2));
-                    ZSTD_memcpy(DTable + i + 2, &DEltX2, sizeof(DEltX2));
-                    ZSTD_memcpy(DTable + i + 4, &DEltX2, sizeof(DEltX2));
-                    ZSTD_memcpy(DTable + i + 6, &DEltX2, sizeof(DEltX2));
-                }
-            }
-        }
-    }
-
-    /* Fill each of the second level symbols by weight. */
-    {
-        int w;
-        for (w = minWeight; w < maxWeight1; ++w) {
-            int const begin = rankStart[w];
-            int const end = rankStart[w+1];
-            U32 const nbBits = nbBitsBaseline - w;
-            U32 const totalBits = nbBits + consumedBits;
-            HUF_fillDTableX2ForWeight(
-                DTable + rankVal[w],
-                sortedSymbols + begin, sortedSymbols + end,
-                totalBits, targetLog,
-                baseSeq, /* level */ 2);
-        }
-    }
-}
-
-static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
-                           const sortedSymbol_t* sortedList,
-                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
-                           const U32 nbBitsBaseline)
-{
-    U32* const rankVal = rankValOrigin[0];
-    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
-    const U32 minBits  = nbBitsBaseline - maxWeight;
-    int w;
-    int const wEnd = (int)maxWeight + 1;
-
-    /* Fill DTable in order of weight. */
-    for (w = 1; w < wEnd; ++w) {
-        int const begin = (int)rankStart[w];
-        int const end = (int)rankStart[w+1];
-        U32 const nbBits = nbBitsBaseline - w;
-
-        if (targetLog-nbBits >= minBits) {
-            /* Enough room for a second symbol. */
-            int start = rankVal[w];
-            U32 const length = 1U << ((targetLog - nbBits) & 0x1F /* quiet static-analyzer */);
-            int minWeight = nbBits + scaleLog;
-            int s;
-            if (minWeight < 1) minWeight = 1;
-            /* Fill the DTable for every symbol of weight w.
-             * These symbols get at least 1 second symbol.
-             */
-            for (s = begin; s != end; ++s) {
-                HUF_fillDTableX2Level2(
-                    DTable + start, targetLog, nbBits,
-                    rankValOrigin[nbBits], minWeight, wEnd,
-                    sortedList, rankStart,
-                    nbBitsBaseline, sortedList[s].symbol);
-                start += length;
-            }
-        } else {
-            /* Only a single symbol. */
-            HUF_fillDTableX2ForWeight(
-                DTable + rankVal[w],
-                sortedList + begin, sortedList + end,
-                nbBits, targetLog,
-                /* baseSeq */ 0, /* level */ 1);
-        }
-    }
-}
-
-typedef struct {
-    rankValCol_t rankVal[HUF_TABLELOG_MAX];
-    U32 rankStats[HUF_TABLELOG_MAX + 1];
-    U32 rankStart0[HUF_TABLELOG_MAX + 3];
-    sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
-    BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
-    U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
-} HUF_ReadDTableX2_Workspace;
-
-size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
-                       const void* src, size_t srcSize,
-                             void* workSpace, size_t wkspSize)
-{
-    return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
-}
-
-size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable,
-                       const void* src, size_t srcSize,
-                             void* workSpace, size_t wkspSize, int bmi2)
-{
-    U32 tableLog, maxW, nbSymbols;
-    DTableDesc dtd = HUF_getDTableDesc(DTable);
-    U32 maxTableLog = dtd.maxTableLog;
-    size_t iSize;
-    void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
-    HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
-    U32 *rankStart;
-
-    HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;
-
-    if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);
-
-    rankStart = wksp->rankStart0 + 1;
-    ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
-    ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
-
-    DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
-    if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
-    /* ZSTD_memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
-
-    iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), bmi2);
-    if (HUF_isError(iSize)) return iSize;
-
-    /* check result */
-    if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
-    if (tableLog <= HUF_DECODER_FAST_TABLELOG && maxTableLog > HUF_DECODER_FAST_TABLELOG) maxTableLog = HUF_DECODER_FAST_TABLELOG;
-
-    /* find maxWeight */
-    for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
-
-    /* Get start index of each weight */
-    {   U32 w, nextRankStart = 0;
-        for (w=1; w<maxW+1; w++) {
-            U32 curr = nextRankStart;
-            nextRankStart += wksp->rankStats[w];
-            rankStart[w] = curr;
-        }
-        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
-        rankStart[maxW+1] = nextRankStart;
-    }
-
-    /* sort symbols by weight */
-    {   U32 s;
-        for (s=0; s<nbSymbols; s++) {
-            U32 const w = wksp->weightList[s];
-            U32 const r = rankStart[w]++;
-            wksp->sortedSymbol[r].symbol = (BYTE)s;
-        }
-        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
-    }
-
-    /* Build rankVal */
-    {   U32* const rankVal0 = wksp->rankVal[0];
-        {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
-            U32 nextRankVal = 0;
-            U32 w;
-            for (w=1; w<maxW+1; w++) {
-                U32 curr = nextRankVal;
-                nextRankVal += wksp->rankStats[w] << (w+rescale);
-                rankVal0[w] = curr;
-        }   }
-        {   U32 const minBits = tableLog+1 - maxW;
-            U32 consumed;
-            for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
-                U32* const rankValPtr = wksp->rankVal[consumed];
-                U32 w;
-                for (w = 1; w < maxW+1; w++) {
-                    rankValPtr[w] = rankVal0[w] >> consumed;
-    }   }   }   }
-
-    HUF_fillDTableX2(dt, maxTableLog,
-                   wksp->sortedSymbol,
-                   wksp->rankStart0, wksp->rankVal, maxW,
-                   tableLog+1);
-
-    dtd.tableLog = (BYTE)maxTableLog;
-    dtd.tableType = 1;
-    ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
-    return iSize;
-}
-
-
-FORCE_INLINE_TEMPLATE U32
-HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
-{
-    size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
-    ZSTD_memcpy(op, &dt[val].sequence, 2);
-    BIT_skipBits(DStream, dt[val].nbBits);
-    return dt[val].length;
-}
-
-FORCE_INLINE_TEMPLATE U32
-HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
-{
-    size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
-    ZSTD_memcpy(op, &dt[val].sequence, 1);
-    if (dt[val].length==1) {
-        BIT_skipBits(DStream, dt[val].nbBits);
-    } else {
-        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
-            BIT_skipBits(DStream, dt[val].nbBits);
-            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
-                /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
-                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
-        }
-    }
-    return 1;
-}
-
-#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
-    ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
-
-#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
-    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
-        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
-
-#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
-    if (MEM_64bits()) \
-        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
-
-HINT_INLINE size_t
-HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
-                const HUF_DEltX2* const dt, const U32 dtLog)
-{
-    BYTE* const pStart = p;
-
-    /* up to 8 symbols at a time */
-    if ((size_t)(pEnd - p) >= sizeof(bitDPtr->bitContainer)) {
-        if (dtLog <= 11 && MEM_64bits()) {
-            /* up to 10 symbols at a time */
-            while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-9)) {
-                HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
-                HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
-                HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
-                HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
-                HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
-            }
-        } else {
-            /* up to 8 symbols at a time */
-            while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
-                HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
-                HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
-                HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
-                HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
-            }
-        }
-    } else {
-        BIT_reloadDStream(bitDPtr);
-    }
-
-    /* closer to end : up to 2 symbols at a time */
-    if ((size_t)(pEnd - p) >= 2) {
-        while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
-            HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
-
-        while (p <= pEnd-2)
-            HUF_DECODE_SYMBOLX2_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
-    }
-
-    if (p < pEnd)
-        p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
-
-    return p-pStart;
-}
-
-FORCE_INLINE_TEMPLATE size_t
-HUF_decompress1X2_usingDTable_internal_body(
-          void* dst,  size_t dstSize,
-    const void* cSrc, size_t cSrcSize,
-    const HUF_DTable* DTable)
-{
-    BIT_DStream_t bitD;
-
-    /* Init */
-    CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
-
-    /* decode */
-    {   BYTE* const ostart = (BYTE*) dst;
-        BYTE* const oend = ostart + dstSize;
-        const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
-        const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
-        DTableDesc const dtd = HUF_getDTableDesc(DTable);
-        HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
-    }
-
-    /* check */
-    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
-
-    /* decoded size */
-    return dstSize;
-}
-FORCE_INLINE_TEMPLATE size_t
-HUF_decompress4X2_usingDTable_internal_body(
-          void* dst,  size_t dstSize,
-    const void* cSrc, size_t cSrcSize,
-    const HUF_DTable* DTable)
-{
-    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
-
-    {   const BYTE* const istart = (const BYTE*) cSrc;
-        BYTE* const ostart = (BYTE*) dst;
-        BYTE* const oend = ostart + dstSize;
-        BYTE* const olimit = oend - (sizeof(size_t)-1);
-        const void* const dtPtr = DTable+1;
-        const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
-
-        /* Init */
-        BIT_DStream_t bitD1;
-        BIT_DStream_t bitD2;
-        BIT_DStream_t bitD3;
-        BIT_DStream_t bitD4;
-        size_t const length1 = MEM_readLE16(istart);
-        size_t const length2 = MEM_readLE16(istart+2);
-        size_t const length3 = MEM_readLE16(istart+4);
-        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
-        const BYTE* const istart1 = istart + 6;  /* jumpTable */
-        const BYTE* const istart2 = istart1 + length1;
-        const BYTE* const istart3 = istart2 + length2;
-        const BYTE* const istart4 = istart3 + length3;
-        size_t const segmentSize = (dstSize+3) / 4;
-        BYTE* const opStart2 = ostart + segmentSize;
-        BYTE* const opStart3 = opStart2 + segmentSize;
-        BYTE* const opStart4 = opStart3 + segmentSize;
-        BYTE* op1 = ostart;
-        BYTE* op2 = opStart2;
-        BYTE* op3 = opStart3;
-        BYTE* op4 = opStart4;
-        U32 endSignal = 1;
-        DTableDesc const dtd = HUF_getDTableDesc(DTable);
-        U32 const dtLog = dtd.tableLog;
-
-        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
-        if (opStart4 > oend) return ERROR(corruption_detected);      /* overflow */
-        CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
-        CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
-        CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
-        CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
-
-        /* 16-32 symbols per loop (4-8 symbols per stream) */
-        if ((size_t)(oend - op4) >= sizeof(size_t)) {
-            for ( ; (endSignal) & (op4 < olimit); ) {
-#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
-                HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
-                HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
-                HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
-                HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
-                HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
-                HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
-                HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
-                HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
-                endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
-                endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
-                HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
-                HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
-                HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
-                HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
-                HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
-                HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
-                HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
-                HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
-                endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
-                endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
-#else
-                HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
-                HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
-                HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
-                HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
-                HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
-                HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
-                HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
-                HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
-                HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
-                HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
-                HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
-                HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
-                HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
-                HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
-                HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
-                HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
-                endSignal = (U32)LIKELY((U32)
-                            (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
-                        & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
-                        & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
-                        & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
-#endif
-            }
-        }
-
-        /* check corruption */
-        if (op1 > opStart2) return ERROR(corruption_detected);
-        if (op2 > opStart3) return ERROR(corruption_detected);
-        if (op3 > opStart4) return ERROR(corruption_detected);
-        /* note : op4 already verified within main loop */
-
-        /* finish bitStreams one by one */
-        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
-        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
-        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
-        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
-
-        /* check */
-        { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
-          if (!endCheck) return ERROR(corruption_detected); }
-
-        /* decoded size */
-        return dstSize;
-    }
-}
-
-#if HUF_NEED_BMI2_FUNCTION
-static BMI2_TARGET_ATTRIBUTE
-size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
-                    size_t cSrcSize, HUF_DTable const* DTable) {
-    return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
-}
-#endif
-
-#if HUF_NEED_DEFAULT_FUNCTION
-static
-size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
-                    size_t cSrcSize, HUF_DTable const* DTable) {
-    return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
-}
-#endif
-
-#if ZSTD_ENABLE_ASM_X86_64_BMI2
-
-HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
-
-static HUF_ASM_X86_64_BMI2_ATTRS size_t
-HUF_decompress4X2_usingDTable_internal_bmi2_asm(
-          void* dst,  size_t dstSize,
-    const void* cSrc, size_t cSrcSize,
-    const HUF_DTable* DTable) {
-    void const* dt = DTable + 1;
-    const BYTE* const iend = (const BYTE*)cSrc + 6;
-    BYTE* const oend = (BYTE*)dst + dstSize;
-    HUF_DecompressAsmArgs args;
-    {
-        size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
-        FORWARD_IF_ERROR(ret, "Failed to init asm args");
-        if (ret != 0)
-            return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
-    }
-
-    assert(args.ip[0] >= args.ilimit);
-    HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(&args);
-
-    /* note : op4 already verified within main loop */
-    assert(args.ip[0] >= iend);
-    assert(args.ip[1] >= iend);
-    assert(args.ip[2] >= iend);
-    assert(args.ip[3] >= iend);
-    assert(args.op[3] <= oend);
-    (void)iend;
-
-    /* finish bitStreams one by one */
-    {
-        size_t const segmentSize = (dstSize+3) / 4;
-        BYTE* segmentEnd = (BYTE*)dst;
-        int i;
-        for (i = 0; i < 4; ++i) {
-            BIT_DStream_t bit;
-            if (segmentSize <= (size_t)(oend - segmentEnd))
-                segmentEnd += segmentSize;
-            else
-                segmentEnd = oend;
-            FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
-            args.op[i] += HUF_decodeStreamX2(args.op[i], &bit, segmentEnd, (HUF_DEltX2 const*)dt, HUF_DECODER_FAST_TABLELOG);
-            if (args.op[i] != segmentEnd)
-                return ERROR(corruption_detected);
-        }
-    }
-
-    /* decoded size */
-    return dstSize;
-}
-#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
-
-static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
-                    size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
-{
-#if DYNAMIC_BMI2
-    if (bmi2) {
-# if ZSTD_ENABLE_ASM_X86_64_BMI2
-        return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
-# else
-        return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
-# endif
-    }
-#else
-    (void)bmi2;
-#endif
-
-#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
-    return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
-#else
-    return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
-#endif
-}
-
-HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
-
-size_t HUF_decompress1X2_usingDTable(
-          void* dst,  size_t dstSize,
-    const void* cSrc, size_t cSrcSize,
-    const HUF_DTable* DTable)
-{
-    DTableDesc dtd = HUF_getDTableDesc(DTable);
-    if (dtd.tableType != 1) return ERROR(GENERIC);
-    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
-}
-
-size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
-                                   const void* cSrc, size_t cSrcSize,
-                                   void* workSpace, size_t wkspSize)
-{
-    const BYTE* ip = (const BYTE*) cSrc;
-
-    size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
-                                               workSpace, wkspSize);
-    if (HUF_isError(hSize)) return hSize;
-    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
-    ip += hSize; cSrcSize -= hSize;
-
-    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
-}
-
-
-size_t HUF_decompress4X2_usingDTable(
-          void* dst,  size_t dstSize,
-    const void* cSrc, size_t cSrcSize,
-    const HUF_DTable* DTable)
-{
-    DTableDesc dtd = HUF_getDTableDesc(DTable);
-    if (dtd.tableType != 1) return ERROR(GENERIC);
-    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
-}
-
-static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
-                                   const void* cSrc, size_t cSrcSize,
-                                   void* workSpace, size_t wkspSize, int bmi2)
-{
-    const BYTE* ip = (const BYTE*) cSrc;
-
-    size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
-                                         workSpace, wkspSize);
-    if (HUF_isError(hSize)) return hSize;
-    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
-    ip += hSize; cSrcSize -= hSize;
-
-    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
-}
-
-size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
-                                   const void* cSrc, size_t cSrcSize,
-                                   void* workSpace, size_t wkspSize)
-{
-    return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
-}
-
-
-#endif /* HUF_FORCE_DECOMPRESS_X1 */
-
-
-/* ***********************************/
-/* Universal decompression selectors */
-/* ***********************************/
-
-size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
-                                    const void* cSrc, size_t cSrcSize,
-                                    const HUF_DTable* DTable)
-{
-    DTableDesc const dtd = HUF_getDTableDesc(DTable);
-#if defined(HUF_FORCE_DECOMPRESS_X1)
-    (void)dtd;
-    assert(dtd.tableType == 0);
-    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
-#elif defined(HUF_FORCE_DECOMPRESS_X2)
-    (void)dtd;
-    assert(dtd.tableType == 1);
-    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
-#else
-    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
-                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
-#endif
-}
-
-size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
-                                    const void* cSrc, size_t cSrcSize,
-                                    const HUF_DTable* DTable)
-{
-    DTableDesc const dtd = HUF_getDTableDesc(DTable);
-#if defined(HUF_FORCE_DECOMPRESS_X1)
-    (void)dtd;
-    assert(dtd.tableType == 0);
-    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
-#elif defined(HUF_FORCE_DECOMPRESS_X2)
-    (void)dtd;
-    assert(dtd.tableType == 1);
-    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
-#else
-    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
-                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
-#endif
-}
-
-
-#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
-typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
-static const algo_time_t algoTime[16 /* Quantization */][2 /* single, double */] =
-{
-    /* single, double, quad */
-    {{0,0}, {1,1}},  /* Q==0 : impossible */
-    {{0,0}, {1,1}},  /* Q==1 : impossible */
-    {{ 150,216}, { 381,119}},   /* Q == 2 : 12-18% */
-    {{ 170,205}, { 514,112}},   /* Q == 3 : 18-25% */
-    {{ 177,199}, { 539,110}},   /* Q == 4 : 25-32% */
-    {{ 197,194}, { 644,107}},   /* Q == 5 : 32-38% */
-    {{ 221,192}, { 735,107}},   /* Q == 6 : 38-44% */
-    {{ 256,189}, { 881,106}},   /* Q == 7 : 44-50% */
-    {{ 359,188}, {1167,109}},   /* Q == 8 : 50-56% */
-    {{ 582,187}, {1570,114}},   /* Q == 9 : 56-62% */
-    {{ 688,187}, {1712,122}},   /* Q ==10 : 62-69% */
-    {{ 825,186}, {1965,136}},   /* Q ==11 : 69-75% */
-    {{ 976,185}, {2131,150}},   /* Q ==12 : 75-81% */
-    {{1180,186}, {2070,175}},   /* Q ==13 : 81-87% */
-    {{1377,185}, {1731,202}},   /* Q ==14 : 87-93% */
-    {{1412,185}, {1695,202}},   /* Q ==15 : 93-99% */
-};
-#endif
-
-/** HUF_selectDecoder() :
- *  Tells which decoder is likely to decode faster,
- *  based on a set of pre-computed metrics.
- * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
- *  Assumption : 0 < dstSize <= 128 KB */
-U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
-{
-    assert(dstSize > 0);
-    assert(dstSize <= 128*1024);
-#if defined(HUF_FORCE_DECOMPRESS_X1)
-    (void)dstSize;
-    (void)cSrcSize;
-    return 0;
-#elif defined(HUF_FORCE_DECOMPRESS_X2)
-    (void)dstSize;
-    (void)cSrcSize;
-    return 1;
-#else
-    /* decoder timing evaluation */
-    {   U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 */
-        U32 const D256 = (U32)(dstSize >> 8);
-        U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
-        U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
-        DTime1 += DTime1 >> 5;  /* small advantage to algorithm using less memory, to reduce cache eviction */
-        return DTime1 < DTime0;
-    }
-#endif
-}
-
-
-size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
-                                     size_t dstSize, const void* cSrc,
-                                     size_t cSrcSize, void* workSpace,
-                                     size_t wkspSize)
-{
-    /* validation checks */
-    if (dstSize == 0) return ERROR(dstSize_tooSmall);
-    if (cSrcSize == 0) return ERROR(corruption_detected);
-
-    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-#if defined(HUF_FORCE_DECOMPRESS_X1)
-        (void)algoNb;
-        assert(algoNb == 0);
-        return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
-#elif defined(HUF_FORCE_DECOMPRESS_X2)
-        (void)algoNb;
-        assert(algoNb == 1);
-        return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
-#else
-        return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
-                            cSrcSize, workSpace, wkspSize):
-                        HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
-#endif
-    }
-}
-
-size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
-                                  const void* cSrc, size_t cSrcSize,
-                                  void* workSpace, size_t wkspSize)
-{
-    /* validation checks */
-    if (dstSize == 0) return ERROR(dstSize_tooSmall);
-    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
-    if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
-    if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
-
-    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-#if defined(HUF_FORCE_DECOMPRESS_X1)
-        (void)algoNb;
-        assert(algoNb == 0);
-        return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
-                                cSrcSize, workSpace, wkspSize);
-#elif defined(HUF_FORCE_DECOMPRESS_X2)
-        (void)algoNb;
-        assert(algoNb == 1);
-        return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
-                                cSrcSize, workSpace, wkspSize);
-#else
-        return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
-                                cSrcSize, workSpace, wkspSize):
-                        HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
-                                cSrcSize, workSpace, wkspSize);
-#endif
-    }
-}
-
-
-size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
-{
-    DTableDesc const dtd = HUF_getDTableDesc(DTable);
-#if defined(HUF_FORCE_DECOMPRESS_X1)
-    (void)dtd;
-    assert(dtd.tableType == 0);
-    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
-#elif defined(HUF_FORCE_DECOMPRESS_X2)
-    (void)dtd;
-    assert(dtd.tableType == 1);
-    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
-#else
-    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
-                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
-#endif
-}
-
-#ifndef HUF_FORCE_DECOMPRESS_X2
-size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
-{
-    const BYTE* ip = (const BYTE*) cSrc;
-
-    size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
-    if (HUF_isError(hSize)) return hSize;
-    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
-    ip += hSize; cSrcSize -= hSize;
-
-    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
-}
-#endif
-
-size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
-{
-    DTableDesc const dtd = HUF_getDTableDesc(DTable);
-#if defined(HUF_FORCE_DECOMPRESS_X1)
-    (void)dtd;
-    assert(dtd.tableType == 0);
-    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
-#elif defined(HUF_FORCE_DECOMPRESS_X2)
-    (void)dtd;
-    assert(dtd.tableType == 1);
-    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
-#else
-    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
-                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
-#endif
-}
-
-size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
-{
-    /* validation checks */
-    if (dstSize == 0) return ERROR(dstSize_tooSmall);
-    if (cSrcSize == 0) return ERROR(corruption_detected);
-
-    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-#if defined(HUF_FORCE_DECOMPRESS_X1)
-        (void)algoNb;
-        assert(algoNb == 0);
-        return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
-#elif defined(HUF_FORCE_DECOMPRESS_X2)
-        (void)algoNb;
-        assert(algoNb == 1);
-        return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
-#else
-        return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
-                        HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
-#endif
-    }
-}
-
-#ifndef ZSTD_NO_UNUSED_FUNCTIONS
-#ifndef HUF_FORCE_DECOMPRESS_X2
-size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
-{
-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-    return HUF_readDTableX1_wksp(DTable, src, srcSize,
-                                 workSpace, sizeof(workSpace));
-}
-
-size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
-                              const void* cSrc, size_t cSrcSize)
-{
-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-    return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
-                                       workSpace, sizeof(workSpace));
-}
-
-size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
-    return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
-}
-#endif
-
-#ifndef HUF_FORCE_DECOMPRESS_X1
-size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
-{
-  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-  return HUF_readDTableX2_wksp(DTable, src, srcSize,
-                               workSpace, sizeof(workSpace));
-}
-
-size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
-                              const void* cSrc, size_t cSrcSize)
-{
-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-    return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
-                                       workSpace, sizeof(workSpace));
-}
-
-size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
-    return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
-}
-#endif
-
-#ifndef HUF_FORCE_DECOMPRESS_X2
-size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-    return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
-                                       workSpace, sizeof(workSpace));
-}
-size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
-    return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
-}
-#endif
-
-#ifndef HUF_FORCE_DECOMPRESS_X1
-size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
-                              const void* cSrc, size_t cSrcSize)
-{
-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-    return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
-                                       workSpace, sizeof(workSpace));
-}
-
-size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
-    return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
-}
-#endif
-
-typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
-
-size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
-    static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
-#endif
-
-    /* validation checks */
-    if (dstSize == 0) return ERROR(dstSize_tooSmall);
-    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
-    if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
-    if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
-
-    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-#if defined(HUF_FORCE_DECOMPRESS_X1)
-        (void)algoNb;
-        assert(algoNb == 0);
-        return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
-#elif defined(HUF_FORCE_DECOMPRESS_X2)
-        (void)algoNb;
-        assert(algoNb == 1);
-        return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
-#else
-        return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
-#endif
-    }
-}
-
-size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-    /* validation checks */
-    if (dstSize == 0) return ERROR(dstSize_tooSmall);
-    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
-    if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
-    if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
-
-    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-#if defined(HUF_FORCE_DECOMPRESS_X1)
-        (void)algoNb;
-        assert(algoNb == 0);
-        return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
-#elif defined(HUF_FORCE_DECOMPRESS_X2)
-        (void)algoNb;
-        assert(algoNb == 1);
-        return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
-#else
-        return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
-                        HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
-#endif
-    }
-}
-
-size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-    return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
-                                         workSpace, sizeof(workSpace));
-}
-
-size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
-                             const void* cSrc, size_t cSrcSize)
-{
-    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
-    return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
-                                      workSpace, sizeof(workSpace));
-}
-#endif
diff --git a/dep/zstd/lib/decompress/huf_decompress_amd64.S b/dep/zstd/lib/decompress/huf_decompress_amd64.S
deleted file mode 100644
index 49589cb61..000000000
--- a/dep/zstd/lib/decompress/huf_decompress_amd64.S
+++ /dev/null
@@ -1,585 +0,0 @@
-/*
- * Copyright (c) Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#include "../common/portability_macros.h"
-
-/* Stack marking
- * ref: https://wiki.gentoo.org/wiki/Hardened/GNU_stack_quickstart
- */
-#if defined(__ELF__) && defined(__GNUC__)
-.section .note.GNU-stack,"",%progbits
-#endif
-
-#if ZSTD_ENABLE_ASM_X86_64_BMI2
-
-/* Calling convention:
- *
- * %rdi contains the first argument: HUF_DecompressAsmArgs*.
- * %rbp isn't maintained (no frame pointer).
- * %rsp contains the stack pointer that grows down.
- *      No red-zone is assumed, only addresses >= %rsp are used.
- * All register contents are preserved.
- *
- * TODO: Support Windows calling convention.
- */
-
-ZSTD_HIDE_ASM_FUNCTION(HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop)
-ZSTD_HIDE_ASM_FUNCTION(HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop)
-ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop)
-ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop)
-.global HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop
-.global HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop
-.global _HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop
-.global _HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop
-.text
-
-/* Sets up register mappings for clarity.
- * op[], bits[], dtable & ip[0] each get their own register.
- * ip[1,2,3] & olimit alias var[].
- * %rax is a scratch register.
- */
-
-#define op0    rsi
-#define op1    rbx
-#define op2    rcx
-#define op3    rdi
-
-#define ip0    r8
-#define ip1    r9
-#define ip2    r10
-#define ip3    r11
-
-#define bits0  rbp
-#define bits1  rdx
-#define bits2  r12
-#define bits3  r13
-#define dtable r14
-#define olimit r15
-
-/* var[] aliases ip[1,2,3] & olimit
- * ip[1,2,3] are saved every iteration.
- * olimit is only used in compute_olimit.
- */
-#define var0   r15
-#define var1   r9
-#define var2   r10
-#define var3   r11
-
-/* 32-bit var registers */
-#define vard0  r15d
-#define vard1  r9d
-#define vard2  r10d
-#define vard3  r11d
-
-/* Calls X(N) for each stream 0, 1, 2, 3. */
-#define FOR_EACH_STREAM(X) \
-    X(0);                  \
-    X(1);                  \
-    X(2);                  \
-    X(3)
-
-/* Calls X(N, idx) for each stream 0, 1, 2, 3. */
-#define FOR_EACH_STREAM_WITH_INDEX(X, idx) \
-    X(0, idx);                             \
-    X(1, idx);                             \
-    X(2, idx);                             \
-    X(3, idx)
-
-/* Define both _HUF_* & HUF_* symbols because MacOS
- * C symbols are prefixed with '_' & Linux symbols aren't.
- */
-_HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop:
-HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop:
-    /* Save all registers - even if they are callee saved for simplicity. */
-    push %rax
-    push %rbx
-    push %rcx
-    push %rdx
-    push %rbp
-    push %rsi
-    push %rdi
-    push %r8
-    push %r9
-    push %r10
-    push %r11
-    push %r12
-    push %r13
-    push %r14
-    push %r15
-
-    /* Read HUF_DecompressAsmArgs* args from %rax */
-    movq %rdi, %rax
-    movq  0(%rax), %ip0
-    movq  8(%rax), %ip1
-    movq 16(%rax), %ip2
-    movq 24(%rax), %ip3
-    movq 32(%rax), %op0
-    movq 40(%rax), %op1
-    movq 48(%rax), %op2
-    movq 56(%rax), %op3
-    movq 64(%rax), %bits0
-    movq 72(%rax), %bits1
-    movq 80(%rax), %bits2
-    movq 88(%rax), %bits3
-    movq 96(%rax), %dtable
-    push %rax      /* argument */
-    push 104(%rax) /* ilimit */
-    push 112(%rax) /* oend */
-    push %olimit   /* olimit space */
-
-    subq $24, %rsp
-
-.L_4X1_compute_olimit:
-    /* Computes how many iterations we can do safely
-     * %r15, %rax may be clobbered
-     * rbx, rdx must be saved
-     * op3 & ip0 mustn't be clobbered
-     */
-    movq %rbx, 0(%rsp)
-    movq %rdx, 8(%rsp)
-
-    movq 32(%rsp), %rax /* rax = oend */
-    subq %op3,    %rax  /* rax = oend - op3 */
-
-    /* r15 = (oend - op3) / 5 */
-    movabsq $-3689348814741910323, %rdx
-    mulq %rdx
-    movq %rdx, %r15
-    shrq $2, %r15
-
-    movq %ip0,     %rax /* rax = ip0 */
-    movq 40(%rsp), %rdx /* rdx = ilimit */
-    subq %rdx,     %rax /* rax = ip0 - ilimit */
-    movq %rax,     %rbx /* rbx = ip0 - ilimit */
-
-    /* rdx = (ip0 - ilimit) / 7 */
-    movabsq $2635249153387078803, %rdx
-    mulq %rdx
-    subq %rdx, %rbx
-    shrq %rbx
-    addq %rbx, %rdx
-    shrq $2, %rdx
-
-    /* r15 = min(%rdx, %r15) */
-    cmpq %rdx, %r15
-    cmova %rdx, %r15
-
-    /* r15 = r15 * 5 */
-    leaq (%r15, %r15, 4), %r15
-
-    /* olimit = op3 + r15 */
-    addq %op3, %olimit
-
-    movq 8(%rsp), %rdx
-    movq 0(%rsp), %rbx
-
-    /* If (op3 + 20 > olimit) */
-    movq %op3, %rax    /* rax = op3 */
-    addq $20,  %rax    /* rax = op3 + 20 */
-    cmpq %rax, %olimit /* op3 + 20 > olimit */
-    jb .L_4X1_exit
-
-    /* If (ip1 < ip0) go to exit */
-    cmpq %ip0, %ip1
-    jb .L_4X1_exit
-
-    /* If (ip2 < ip1) go to exit */
-    cmpq %ip1, %ip2
-    jb .L_4X1_exit
-
-    /* If (ip3 < ip2) go to exit */
-    cmpq %ip2, %ip3
-    jb .L_4X1_exit
-
-/* Reads top 11 bits from bits[n]
- * Loads dt[bits[n]] into var[n]
- */
-#define GET_NEXT_DELT(n)                \
-    movq $53, %var##n;                  \
-    shrxq %var##n, %bits##n, %var##n;   \
-    movzwl (%dtable,%var##n,2),%vard##n
-
-/* var[n] must contain the DTable entry computed with GET_NEXT_DELT
- * Moves var[n] to %rax
- * bits[n] <<= var[n] & 63
- * op[n][idx] = %rax >> 8
- * %ah is a way to access bits [8, 16) of %rax
- */
-#define DECODE_FROM_DELT(n, idx)       \
-    movq %var##n, %rax;                \
-    shlxq %var##n, %bits##n, %bits##n; \
-    movb %ah, idx(%op##n)
-
-/* Assumes GET_NEXT_DELT has been called.
- * Calls DECODE_FROM_DELT then GET_NEXT_DELT
- */
-#define DECODE_AND_GET_NEXT(n, idx) \
-    DECODE_FROM_DELT(n, idx);       \
-    GET_NEXT_DELT(n)                \
-
-/* // ctz & nbBytes is stored in bits[n]
- * // nbBits is stored in %rax
- * ctz  = CTZ[bits[n]]
- * nbBits  = ctz & 7
- * nbBytes = ctz >> 3
- * op[n]  += 5
- * ip[n]  -= nbBytes
- * // Note: x86-64 is little-endian ==> no bswap
- * bits[n] = MEM_readST(ip[n]) | 1
- * bits[n] <<= nbBits
- */
-#define RELOAD_BITS(n)             \
-    bsfq %bits##n, %bits##n;       \
-    movq %bits##n, %rax;           \
-    andq $7, %rax;                 \
-    shrq $3, %bits##n;             \
-    leaq 5(%op##n), %op##n;        \
-    subq %bits##n, %ip##n;         \
-    movq (%ip##n), %bits##n;       \
-    orq $1, %bits##n;              \
-    shlx %rax, %bits##n, %bits##n
-
-    /* Store clobbered variables on the stack */
-    movq %olimit, 24(%rsp)
-    movq %ip1, 0(%rsp)
-    movq %ip2, 8(%rsp)
-    movq %ip3, 16(%rsp)
-
-    /* Call GET_NEXT_DELT for each stream */
-    FOR_EACH_STREAM(GET_NEXT_DELT)
-
-    .p2align 6
-
-.L_4X1_loop_body:
-    /* Decode 5 symbols in each of the 4 streams (20 total)
-     * Must have called GET_NEXT_DELT for each stream
-     */
-    FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 0)
-    FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 1)
-    FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 2)
-    FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 3)
-    FOR_EACH_STREAM_WITH_INDEX(DECODE_FROM_DELT, 4)
-
-    /* Load ip[1,2,3] from stack (var[] aliases them)
-     * ip[] is needed for RELOAD_BITS
-     * Each will be stored back to the stack after RELOAD
-     */
-    movq 0(%rsp), %ip1
-    movq 8(%rsp), %ip2
-    movq 16(%rsp), %ip3
-
-    /* Reload each stream & fetch the next table entry
-     * to prepare for the next iteration
-     */
-    RELOAD_BITS(0)
-    GET_NEXT_DELT(0)
-
-    RELOAD_BITS(1)
-    movq %ip1, 0(%rsp)
-    GET_NEXT_DELT(1)
-
-    RELOAD_BITS(2)
-    movq %ip2, 8(%rsp)
-    GET_NEXT_DELT(2)
-
-    RELOAD_BITS(3)
-    movq %ip3, 16(%rsp)
-    GET_NEXT_DELT(3)
-
-    /* If op3 < olimit: continue the loop */
-    cmp %op3, 24(%rsp)
-    ja .L_4X1_loop_body
-
-    /* Reload ip[1,2,3] from stack */
-    movq 0(%rsp), %ip1
-    movq 8(%rsp), %ip2
-    movq 16(%rsp), %ip3
-
-    /* Re-compute olimit */
-    jmp .L_4X1_compute_olimit
-
-#undef GET_NEXT_DELT
-#undef DECODE_FROM_DELT
-#undef DECODE
-#undef RELOAD_BITS
-.L_4X1_exit:
-    addq $24, %rsp
-
-    /* Restore stack (oend & olimit) */
-    pop %rax /* olimit */
-    pop %rax /* oend */
-    pop %rax /* ilimit */
-    pop %rax /* arg */
-
-    /* Save ip / op / bits */
-    movq %ip0,  0(%rax)
-    movq %ip1,  8(%rax)
-    movq %ip2, 16(%rax)
-    movq %ip3, 24(%rax)
-    movq %op0, 32(%rax)
-    movq %op1, 40(%rax)
-    movq %op2, 48(%rax)
-    movq %op3, 56(%rax)
-    movq %bits0, 64(%rax)
-    movq %bits1, 72(%rax)
-    movq %bits2, 80(%rax)
-    movq %bits3, 88(%rax)
-
-    /* Restore registers */
-    pop %r15
-    pop %r14
-    pop %r13
-    pop %r12
-    pop %r11
-    pop %r10
-    pop %r9
-    pop %r8
-    pop %rdi
-    pop %rsi
-    pop %rbp
-    pop %rdx
-    pop %rcx
-    pop %rbx
-    pop %rax
-    ret
-
-_HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop:
-HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop:
-    /* Save all registers - even if they are callee saved for simplicity. */
-    push %rax
-    push %rbx
-    push %rcx
-    push %rdx
-    push %rbp
-    push %rsi
-    push %rdi
-    push %r8
-    push %r9
-    push %r10
-    push %r11
-    push %r12
-    push %r13
-    push %r14
-    push %r15
-
-    movq %rdi, %rax
-    movq  0(%rax), %ip0
-    movq  8(%rax), %ip1
-    movq 16(%rax), %ip2
-    movq 24(%rax), %ip3
-    movq 32(%rax), %op0
-    movq 40(%rax), %op1
-    movq 48(%rax), %op2
-    movq 56(%rax), %op3
-    movq 64(%rax), %bits0
-    movq 72(%rax), %bits1
-    movq 80(%rax), %bits2
-    movq 88(%rax), %bits3
-    movq 96(%rax), %dtable
-    push %rax      /* argument */
-    push %rax      /* olimit */
-    push 104(%rax) /* ilimit */
-
-    movq 112(%rax), %rax
-    push %rax /* oend3 */
-
-    movq %op3, %rax
-    push %rax /* oend2 */
-
-    movq %op2, %rax
-    push %rax /* oend1 */
-
-    movq %op1, %rax
-    push %rax /* oend0 */
-
-    /* Scratch space */
-    subq $8, %rsp
-
-.L_4X2_compute_olimit:
-    /* Computes how many iterations we can do safely
-     * %r15, %rax may be clobbered
-     * rdx must be saved
-     * op[1,2,3,4] & ip0 mustn't be clobbered
-     */
-    movq %rdx, 0(%rsp)
-
-    /* We can consume up to 7 input bytes each iteration. */
-    movq %ip0,     %rax  /* rax = ip0 */
-    movq 40(%rsp), %rdx  /* rdx = ilimit */
-    subq %rdx,     %rax  /* rax = ip0 - ilimit */
-    movq %rax,    %r15   /* r15 = ip0 - ilimit */
-
-    /* rdx = rax / 7 */
-    movabsq $2635249153387078803, %rdx
-    mulq %rdx
-    subq %rdx, %r15
-    shrq %r15
-    addq %r15, %rdx
-    shrq $2, %rdx
-
-    /* r15 = (ip0 - ilimit) / 7 */
-    movq %rdx, %r15
-
-    movabsq $-3689348814741910323, %rdx
-    movq 8(%rsp), %rax /* rax = oend0 */
-    subq %op0,    %rax /* rax = oend0 - op0 */
-    mulq %rdx
-    shrq $3,      %rdx /* rdx = rax / 10 */
-
-    /* r15 = min(%rdx, %r15) */
-    cmpq  %rdx, %r15
-    cmova %rdx, %r15
-
-    movabsq $-3689348814741910323, %rdx
-    movq 16(%rsp), %rax /* rax = oend1 */
-    subq %op1,     %rax /* rax = oend1 - op1 */
-    mulq %rdx
-    shrq $3,       %rdx /* rdx = rax / 10 */
-
-    /* r15 = min(%rdx, %r15) */
-    cmpq  %rdx, %r15
-    cmova %rdx, %r15
-
-    movabsq $-3689348814741910323, %rdx
-    movq 24(%rsp), %rax /* rax = oend2 */
-    subq %op2,     %rax /* rax = oend2 - op2 */
-    mulq %rdx
-    shrq $3,       %rdx /* rdx = rax / 10 */
-
-    /* r15 = min(%rdx, %r15) */
-    cmpq  %rdx, %r15
-    cmova %rdx, %r15
-
-    movabsq $-3689348814741910323, %rdx
-    movq 32(%rsp), %rax /* rax = oend3 */
-    subq %op3,     %rax /* rax = oend3 - op3 */
-    mulq %rdx
-    shrq $3,       %rdx /* rdx = rax / 10 */
-
-    /* r15 = min(%rdx, %r15) */
-    cmpq  %rdx, %r15
-    cmova %rdx, %r15
-
-    /* olimit = op3 + 5 * r15 */
-    movq %r15, %rax
-    leaq (%op3, %rax, 4), %olimit
-    addq %rax, %olimit
-
-    movq 0(%rsp), %rdx
-
-    /* If (op3 + 10 > olimit) */
-    movq %op3, %rax    /* rax = op3 */
-    addq $10,  %rax    /* rax = op3 + 10 */
-    cmpq %rax, %olimit /* op3 + 10 > olimit */
-    jb .L_4X2_exit
-
-    /* If (ip1 < ip0) go to exit */
-    cmpq %ip0, %ip1
-    jb .L_4X2_exit
-
-    /* If (ip2 < ip1) go to exit */
-    cmpq %ip1, %ip2
-    jb .L_4X2_exit
-
-    /* If (ip3 < ip2) go to exit */
-    cmpq %ip2, %ip3
-    jb .L_4X2_exit
-
-#define DECODE(n, idx)              \
-    movq %bits##n, %rax;            \
-    shrq $53, %rax;                 \
-    movzwl 0(%dtable,%rax,4),%r8d;  \
-    movzbl 2(%dtable,%rax,4),%r15d; \
-    movzbl 3(%dtable,%rax,4),%eax;  \
-    movw %r8w, (%op##n);            \
-    shlxq %r15, %bits##n, %bits##n; \
-    addq %rax, %op##n
-
-#define RELOAD_BITS(n)              \
-    bsfq %bits##n, %bits##n;        \
-    movq %bits##n, %rax;            \
-    shrq $3, %bits##n;              \
-    andq $7, %rax;                  \
-    subq %bits##n, %ip##n;          \
-    movq (%ip##n), %bits##n;        \
-    orq $1, %bits##n;               \
-    shlxq %rax, %bits##n, %bits##n
-
-
-    movq %olimit, 48(%rsp)
-
-    .p2align 6
-
-.L_4X2_loop_body:
-    /* We clobber r8, so store it on the stack */
-    movq %r8, 0(%rsp)
-
-    /* Decode 5 symbols from each of the 4 streams (20 symbols total). */
-    FOR_EACH_STREAM_WITH_INDEX(DECODE, 0)
-    FOR_EACH_STREAM_WITH_INDEX(DECODE, 1)
-    FOR_EACH_STREAM_WITH_INDEX(DECODE, 2)
-    FOR_EACH_STREAM_WITH_INDEX(DECODE, 3)
-    FOR_EACH_STREAM_WITH_INDEX(DECODE, 4)
-
-    /* Reload r8 */
-    movq 0(%rsp), %r8
-
-    FOR_EACH_STREAM(RELOAD_BITS)
-
-    cmp %op3, 48(%rsp)
-    ja .L_4X2_loop_body
-    jmp .L_4X2_compute_olimit
-
-#undef DECODE
-#undef RELOAD_BITS
-.L_4X2_exit:
-    addq $8, %rsp
-    /* Restore stack (oend & olimit) */
-    pop %rax /* oend0 */
-    pop %rax /* oend1 */
-    pop %rax /* oend2 */
-    pop %rax /* oend3 */
-    pop %rax /* ilimit */
-    pop %rax /* olimit */
-    pop %rax /* arg */
-
-    /* Save ip / op / bits */
-    movq %ip0,  0(%rax)
-    movq %ip1,  8(%rax)
-    movq %ip2, 16(%rax)
-    movq %ip3, 24(%rax)
-    movq %op0, 32(%rax)
-    movq %op1, 40(%rax)
-    movq %op2, 48(%rax)
-    movq %op3, 56(%rax)
-    movq %bits0, 64(%rax)
-    movq %bits1, 72(%rax)
-    movq %bits2, 80(%rax)
-    movq %bits3, 88(%rax)
-
-    /* Restore registers */
-    pop %r15
-    pop %r14
-    pop %r13
-    pop %r12
-    pop %r11
-    pop %r10
-    pop %r9
-    pop %r8
-    pop %rdi
-    pop %rsi
-    pop %rbp
-    pop %rdx
-    pop %rcx
-    pop %rbx
-    pop %rax
-    ret
-
-#endif
diff --git a/dep/zstd/lib/decompress/zstd_ddict.c b/dep/zstd/lib/decompress/zstd_ddict.c
deleted file mode 100644
index ce335477b..000000000
--- a/dep/zstd/lib/decompress/zstd_ddict.c
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-/* zstd_ddict.c :
- * concentrates all logic that needs to know the internals of ZSTD_DDict object */
-
-/*-*******************************************************
-*  Dependencies
-*********************************************************/
-#include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
-#include "../common/cpu.h"         /* bmi2 */
-#include "../common/mem.h"         /* low level memory routines */
-#define FSE_STATIC_LINKING_ONLY
-#include "../common/fse.h"
-#define HUF_STATIC_LINKING_ONLY
-#include "../common/huf.h"
-#include "zstd_decompress_internal.h"
-#include "zstd_ddict.h"
-
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
-#  include "../legacy/zstd_legacy.h"
-#endif
-
-
-
-/*-*******************************************************
-*  Types
-*********************************************************/
-struct ZSTD_DDict_s {
-    void* dictBuffer;
-    const void* dictContent;
-    size_t dictSize;
-    ZSTD_entropyDTables_t entropy;
-    U32 dictID;
-    U32 entropyPresent;
-    ZSTD_customMem cMem;
-};  /* typedef'd to ZSTD_DDict within "zstd.h" */
-
-const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
-{
-    assert(ddict != NULL);
-    return ddict->dictContent;
-}
-
-size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
-{
-    assert(ddict != NULL);
-    return ddict->dictSize;
-}
-
-void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
-{
-    DEBUGLOG(4, "ZSTD_copyDDictParameters");
-    assert(dctx != NULL);
-    assert(ddict != NULL);
-    dctx->dictID = ddict->dictID;
-    dctx->prefixStart = ddict->dictContent;
-    dctx->virtualStart = ddict->dictContent;
-    dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
-    dctx->previousDstEnd = dctx->dictEnd;
-#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
-    dctx->dictContentBeginForFuzzing = dctx->prefixStart;
-    dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
-#endif
-    if (ddict->entropyPresent) {
-        dctx->litEntropy = 1;
-        dctx->fseEntropy = 1;
-        dctx->LLTptr = ddict->entropy.LLTable;
-        dctx->MLTptr = ddict->entropy.MLTable;
-        dctx->OFTptr = ddict->entropy.OFTable;
-        dctx->HUFptr = ddict->entropy.hufTable;
-        dctx->entropy.rep[0] = ddict->entropy.rep[0];
-        dctx->entropy.rep[1] = ddict->entropy.rep[1];
-        dctx->entropy.rep[2] = ddict->entropy.rep[2];
-    } else {
-        dctx->litEntropy = 0;
-        dctx->fseEntropy = 0;
-    }
-}
-
-
-static size_t
-ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
-                           ZSTD_dictContentType_e dictContentType)
-{
-    ddict->dictID = 0;
-    ddict->entropyPresent = 0;
-    if (dictContentType == ZSTD_dct_rawContent) return 0;
-
-    if (ddict->dictSize < 8) {
-        if (dictContentType == ZSTD_dct_fullDict)
-            return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
-        return 0;   /* pure content mode */
-    }
-    {   U32 const magic = MEM_readLE32(ddict->dictContent);
-        if (magic != ZSTD_MAGIC_DICTIONARY) {
-            if (dictContentType == ZSTD_dct_fullDict)
-                return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
-            return 0;   /* pure content mode */
-        }
-    }
-    ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
-
-    /* load entropy tables */
-    RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
-            &ddict->entropy, ddict->dictContent, ddict->dictSize)),
-        dictionary_corrupted, "");
-    ddict->entropyPresent = 1;
-    return 0;
-}
-
-
-static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
-                                      const void* dict, size_t dictSize,
-                                      ZSTD_dictLoadMethod_e dictLoadMethod,
-                                      ZSTD_dictContentType_e dictContentType)
-{
-    if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
-        ddict->dictBuffer = NULL;
-        ddict->dictContent = dict;
-        if (!dict) dictSize = 0;
-    } else {
-        void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
-        ddict->dictBuffer = internalBuffer;
-        ddict->dictContent = internalBuffer;
-        if (!internalBuffer) return ERROR(memory_allocation);
-        ZSTD_memcpy(internalBuffer, dict, dictSize);
-    }
-    ddict->dictSize = dictSize;
-    ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
-
-    /* parse dictionary content */
-    FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
-
-    return 0;
-}
-
-ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
-                                      ZSTD_dictLoadMethod_e dictLoadMethod,
-                                      ZSTD_dictContentType_e dictContentType,
-                                      ZSTD_customMem customMem)
-{
-    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
-
-    {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
-        if (ddict == NULL) return NULL;
-        ddict->cMem = customMem;
-        {   size_t const initResult = ZSTD_initDDict_internal(ddict,
-                                            dict, dictSize,
-                                            dictLoadMethod, dictContentType);
-            if (ZSTD_isError(initResult)) {
-                ZSTD_freeDDict(ddict);
-                return NULL;
-        }   }
-        return ddict;
-    }
-}
-
-/*! ZSTD_createDDict() :
-*   Create a digested dictionary, to start decompression without startup delay.
-*   `dict` content is copied inside DDict.
-*   Consequently, `dict` can be released after `ZSTD_DDict` creation */
-ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
-{
-    ZSTD_customMem const allocator = { NULL, NULL, NULL };
-    return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
-}
-
-/*! ZSTD_createDDict_byReference() :
- *  Create a digested dictionary, to start decompression without startup delay.
- *  Dictionary content is simply referenced, it will be accessed during decompression.
- *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
-ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
-{
-    ZSTD_customMem const allocator = { NULL, NULL, NULL };
-    return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
-}
-
-
-const ZSTD_DDict* ZSTD_initStaticDDict(
-                                void* sBuffer, size_t sBufferSize,
-                                const void* dict, size_t dictSize,
-                                ZSTD_dictLoadMethod_e dictLoadMethod,
-                                ZSTD_dictContentType_e dictContentType)
-{
-    size_t const neededSpace = sizeof(ZSTD_DDict)
-                             + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
-    ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
-    assert(sBuffer != NULL);
-    assert(dict != NULL);
-    if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */
-    if (sBufferSize < neededSpace) return NULL;
-    if (dictLoadMethod == ZSTD_dlm_byCopy) {
-        ZSTD_memcpy(ddict+1, dict, dictSize);  /* local copy */
-        dict = ddict+1;
-    }
-    if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
-                                              dict, dictSize,
-                                              ZSTD_dlm_byRef, dictContentType) ))
-        return NULL;
-    return ddict;
-}
-
-
-size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
-{
-    if (ddict==NULL) return 0;   /* support free on NULL */
-    {   ZSTD_customMem const cMem = ddict->cMem;
-        ZSTD_customFree(ddict->dictBuffer, cMem);
-        ZSTD_customFree(ddict, cMem);
-        return 0;
-    }
-}
-
-/*! ZSTD_estimateDDictSize() :
- *  Estimate amount of memory that will be needed to create a dictionary for decompression.
- *  Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
-size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
-{
-    return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
-}
-
-size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
-{
-    if (ddict==NULL) return 0;   /* support sizeof on NULL */
-    return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
-}
-
-/*! ZSTD_getDictID_fromDDict() :
- *  Provides the dictID of the dictionary loaded into `ddict`.
- *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
- *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
-unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
-{
-    if (ddict==NULL) return 0;
-    return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
-}
diff --git a/dep/zstd/lib/decompress/zstd_ddict.h b/dep/zstd/lib/decompress/zstd_ddict.h
deleted file mode 100644
index bd03268b5..000000000
--- a/dep/zstd/lib/decompress/zstd_ddict.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-
-#ifndef ZSTD_DDICT_H
-#define ZSTD_DDICT_H
-
-/*-*******************************************************
- *  Dependencies
- *********************************************************/
-#include "../common/zstd_deps.h"   /* size_t */
-#include "../zstd.h"     /* ZSTD_DDict, and several public functions */
-
-
-/*-*******************************************************
- *  Interface
- *********************************************************/
-
-/* note: several prototypes are already published in `zstd.h` :
- * ZSTD_createDDict()
- * ZSTD_createDDict_byReference()
- * ZSTD_createDDict_advanced()
- * ZSTD_freeDDict()
- * ZSTD_initStaticDDict()
- * ZSTD_sizeof_DDict()
- * ZSTD_estimateDDictSize()
- * ZSTD_getDictID_fromDict()
- */
-
-const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict);
-size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict);
-
-void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
-
-
-
-#endif /* ZSTD_DDICT_H */
diff --git a/dep/zstd/lib/decompress/zstd_decompress.c b/dep/zstd/lib/decompress/zstd_decompress.c
deleted file mode 100644
index 0031e98cf..000000000
--- a/dep/zstd/lib/decompress/zstd_decompress.c
+++ /dev/null
@@ -1,2230 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-
-/* ***************************************************************
-*  Tuning parameters
-*****************************************************************/
-/*!
- * HEAPMODE :
- * Select how default decompression function ZSTD_decompress() allocates its context,
- * on stack (0), or into heap (1, default; requires malloc()).
- * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected.
- */
-#ifndef ZSTD_HEAPMODE
-#  define ZSTD_HEAPMODE 1
-#endif
-
-/*!
-*  LEGACY_SUPPORT :
-*  if set to 1+, ZSTD_decompress() can decode older formats (v0.1+)
-*/
-#ifndef ZSTD_LEGACY_SUPPORT
-#  define ZSTD_LEGACY_SUPPORT 0
-#endif
-
-/*!
- *  MAXWINDOWSIZE_DEFAULT :
- *  maximum window size accepted by DStream __by default__.
- *  Frames requiring more memory will be rejected.
- *  It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize().
- */
-#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
-#  define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1)
-#endif
-
-/*!
- *  NO_FORWARD_PROGRESS_MAX :
- *  maximum allowed nb of calls to ZSTD_decompressStream()
- *  without any forward progress
- *  (defined as: no byte read from input, and no byte flushed to output)
- *  before triggering an error.
- */
-#ifndef ZSTD_NO_FORWARD_PROGRESS_MAX
-#  define ZSTD_NO_FORWARD_PROGRESS_MAX 16
-#endif
-
-
-/*-*******************************************************
-*  Dependencies
-*********************************************************/
-#include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
-#include "../common/mem.h"         /* low level memory routines */
-#define FSE_STATIC_LINKING_ONLY
-#include "../common/fse.h"
-#define HUF_STATIC_LINKING_ONLY
-#include "../common/huf.h"
-#include "../common/xxhash.h" /* XXH64_reset, XXH64_update, XXH64_digest, XXH64 */
-#include "../common/zstd_internal.h"  /* blockProperties_t */
-#include "zstd_decompress_internal.h"   /* ZSTD_DCtx */
-#include "zstd_ddict.h"  /* ZSTD_DDictDictContent */
-#include "zstd_decompress_block.h"   /* ZSTD_decompressBlock_internal */
-
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
-#  include "../legacy/zstd_legacy.h"
-#endif
-
-
-
-/*************************************
- * Multiple DDicts Hashset internals *
- *************************************/
-
-#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4
-#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3   /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float.
-                                                     * Currently, that means a 0.75 load factor.
-                                                     * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded
-                                                     * the load factor of the ddict hash set.
-                                                     */
-
-#define DDICT_HASHSET_TABLE_BASE_SIZE 64
-#define DDICT_HASHSET_RESIZE_FACTOR 2
-
-/* Hash function to determine starting position of dict insertion within the table
- * Returns an index between [0, hashSet->ddictPtrTableSize]
- */
-static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) {
-    const U64 hash = XXH64(&dictID, sizeof(U32), 0);
-    /* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */
-    return hash & (hashSet->ddictPtrTableSize - 1);
-}
-
-/* Adds DDict to a hashset without resizing it.
- * If inserting a DDict with a dictID that already exists in the set, replaces the one in the set.
- * Returns 0 if successful, or a zstd error code if something went wrong.
- */
-static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict) {
-    const U32 dictID = ZSTD_getDictID_fromDDict(ddict);
-    size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
-    const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
-    RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!");
-    DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
-    while (hashSet->ddictPtrTable[idx] != NULL) {
-        /* Replace existing ddict if inserting ddict with same dictID */
-        if (ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]) == dictID) {
-            DEBUGLOG(4, "DictID already exists, replacing rather than adding");
-            hashSet->ddictPtrTable[idx] = ddict;
-            return 0;
-        }
-        idx &= idxRangeMask;
-        idx++;
-    }
-    DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
-    hashSet->ddictPtrTable[idx] = ddict;
-    hashSet->ddictPtrCount++;
-    return 0;
-}
-
-/* Expands hash table by factor of DDICT_HASHSET_RESIZE_FACTOR and
- * rehashes all values, allocates new table, frees old table.
- * Returns 0 on success, otherwise a zstd error code.
- */
-static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
-    size_t newTableSize = hashSet->ddictPtrTableSize * DDICT_HASHSET_RESIZE_FACTOR;
-    const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem);
-    const ZSTD_DDict** oldTable = hashSet->ddictPtrTable;
-    size_t oldTableSize = hashSet->ddictPtrTableSize;
-    size_t i;
-
-    DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize);
-    RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!");
-    hashSet->ddictPtrTable = newTable;
-    hashSet->ddictPtrTableSize = newTableSize;
-    hashSet->ddictPtrCount = 0;
-    for (i = 0; i < oldTableSize; ++i) {
-        if (oldTable[i] != NULL) {
-            FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, oldTable[i]), "");
-        }
-    }
-    ZSTD_customFree((void*)oldTable, customMem);
-    DEBUGLOG(4, "Finished re-hash");
-    return 0;
-}
-
-/* Fetches a DDict with the given dictID
- * Returns the ZSTD_DDict* with the requested dictID. If it doesn't exist, then returns NULL.
- */
-static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, U32 dictID) {
-    size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
-    const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
-    DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
-    for (;;) {
-        size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]);
-        if (currDictID == dictID || currDictID == 0) {
-            /* currDictID == 0 implies a NULL ddict entry */
-            break;
-        } else {
-            idx &= idxRangeMask;    /* Goes to start of table when we reach the end */
-            idx++;
-        }
-    }
-    DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
-    return hashSet->ddictPtrTable[idx];
-}
-
-/* Allocates space for and returns a ddict hash set
- * The hash set's ZSTD_DDict* table has all values automatically set to NULL to begin with.
- * Returns NULL if allocation failed.
- */
-static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) {
-    ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem);
-    DEBUGLOG(4, "Allocating new hash set");
-    if (!ret)
-        return NULL;
-    ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem);
-    if (!ret->ddictPtrTable) {
-        ZSTD_customFree(ret, customMem);
-        return NULL;
-    }
-    ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE;
-    ret->ddictPtrCount = 0;
-    return ret;
-}
-
-/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself.
- * Note: The ZSTD_DDict* within the table are NOT freed.
- */
-static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
-    DEBUGLOG(4, "Freeing ddict hash set");
-    if (hashSet && hashSet->ddictPtrTable) {
-        ZSTD_customFree((void*)hashSet->ddictPtrTable, customMem);
-    }
-    if (hashSet) {
-        ZSTD_customFree(hashSet, customMem);
-    }
-}
-
-/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set.
- * Returns 0 on success, or a ZSTD error.
- */
-static size_t ZSTD_DDictHashSet_addDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, ZSTD_customMem customMem) {
-    DEBUGLOG(4, "Adding dict ID: %u to hashset with - Count: %zu Tablesize: %zu", ZSTD_getDictID_fromDDict(ddict), hashSet->ddictPtrCount, hashSet->ddictPtrTableSize);
-    if (hashSet->ddictPtrCount * DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT / hashSet->ddictPtrTableSize * DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT != 0) {
-        FORWARD_IF_ERROR(ZSTD_DDictHashSet_expand(hashSet, customMem), "");
-    }
-    FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, ddict), "");
-    return 0;
-}
-
-/*-*************************************************************
-*   Context management
-***************************************************************/
-size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx)
-{
-    if (dctx==NULL) return 0;   /* support sizeof NULL */
-    return sizeof(*dctx)
-           + ZSTD_sizeof_DDict(dctx->ddictLocal)
-           + dctx->inBuffSize + dctx->outBuffSize;
-}
-
-size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
-
-
-static size_t ZSTD_startingInputLength(ZSTD_format_e format)
-{
-    size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format);
-    /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
-    assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
-    return startingInputLength;
-}
-
-static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
-{
-    assert(dctx->streamStage == zdss_init);
-    dctx->format = ZSTD_f_zstd1;
-    dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
-    dctx->outBufferMode = ZSTD_bm_buffered;
-    dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
-    dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
-}
-
-static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
-{
-    dctx->staticSize  = 0;
-    dctx->ddict       = NULL;
-    dctx->ddictLocal  = NULL;
-    dctx->dictEnd     = NULL;
-    dctx->ddictIsCold = 0;
-    dctx->dictUses = ZSTD_dont_use;
-    dctx->inBuff      = NULL;
-    dctx->inBuffSize  = 0;
-    dctx->outBuffSize = 0;
-    dctx->streamStage = zdss_init;
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
-    dctx->legacyContext = NULL;
-    dctx->previousLegacyVersion = 0;
-#endif
-    dctx->noForwardProgress = 0;
-    dctx->oversizedDuration = 0;
-#if DYNAMIC_BMI2
-    dctx->bmi2 = ZSTD_cpuSupportsBmi2();
-#endif
-    dctx->ddictSet = NULL;
-    ZSTD_DCtx_resetParameters(dctx);
-#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
-    dctx->dictContentEndForFuzzing = NULL;
-#endif
-}
-
-ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
-{
-    ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace;
-
-    if ((size_t)workspace & 7) return NULL;  /* 8-aligned */
-    if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL;  /* minimum size */
-
-    ZSTD_initDCtx_internal(dctx);
-    dctx->staticSize = workspaceSize;
-    dctx->inBuff = (char*)(dctx+1);
-    return dctx;
-}
-
-static ZSTD_DCtx* ZSTD_createDCtx_internal(ZSTD_customMem customMem) {
-    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
-
-    {   ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem);
-        if (!dctx) return NULL;
-        dctx->customMem = customMem;
-        ZSTD_initDCtx_internal(dctx);
-        return dctx;
-    }
-}
-
-ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
-{
-    return ZSTD_createDCtx_internal(customMem);
-}
-
-ZSTD_DCtx* ZSTD_createDCtx(void)
-{
-    DEBUGLOG(3, "ZSTD_createDCtx");
-    return ZSTD_createDCtx_internal(ZSTD_defaultCMem);
-}
-
-static void ZSTD_clearDict(ZSTD_DCtx* dctx)
-{
-    ZSTD_freeDDict(dctx->ddictLocal);
-    dctx->ddictLocal = NULL;
-    dctx->ddict = NULL;
-    dctx->dictUses = ZSTD_dont_use;
-}
-
-size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
-{
-    if (dctx==NULL) return 0;   /* support free on NULL */
-    RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx");
-    {   ZSTD_customMem const cMem = dctx->customMem;
-        ZSTD_clearDict(dctx);
-        ZSTD_customFree(dctx->inBuff, cMem);
-        dctx->inBuff = NULL;
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
-        if (dctx->legacyContext)
-            ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion);
-#endif
-        if (dctx->ddictSet) {
-            ZSTD_freeDDictHashSet(dctx->ddictSet, cMem);
-            dctx->ddictSet = NULL;
-        }
-        ZSTD_customFree(dctx, cMem);
-        return 0;
-    }
-}
-
-/* no longer useful */
-void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
-{
-    size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx);
-    ZSTD_memcpy(dstDCtx, srcDCtx, toCopy);  /* no need to copy workspace */
-}
-
-/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on
- * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then
- * accordingly sets the ddict to be used to decompress the frame.
- *
- * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is.
- *
- * ZSTD_d_refMultipleDDicts must be enabled for this function to be called.
- */
-static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) {
-    assert(dctx->refMultipleDDicts && dctx->ddictSet);
-    DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame");
-    if (dctx->ddict) {
-        const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID);
-        if (frameDDict) {
-            DEBUGLOG(4, "DDict found!");
-            ZSTD_clearDict(dctx);
-            dctx->dictID = dctx->fParams.dictID;
-            dctx->ddict = frameDDict;
-            dctx->dictUses = ZSTD_use_indefinitely;
-        }
-    }
-}
-
-
-/*-*************************************************************
- *   Frame header decoding
- ***************************************************************/
-
-/*! ZSTD_isFrame() :
- *  Tells if the content of `buffer` starts with a valid Frame Identifier.
- *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
- *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
- *  Note 3 : Skippable Frame Identifiers are considered valid. */
-unsigned ZSTD_isFrame(const void* buffer, size_t size)
-{
-    if (size < ZSTD_FRAMEIDSIZE) return 0;
-    {   U32 const magic = MEM_readLE32(buffer);
-        if (magic == ZSTD_MAGICNUMBER) return 1;
-        if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
-    }
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
-    if (ZSTD_isLegacy(buffer, size)) return 1;
-#endif
-    return 0;
-}
-
-/*! ZSTD_isSkippableFrame() :
- *  Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame.
- *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
- */
-unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size)
-{
-    if (size < ZSTD_FRAMEIDSIZE) return 0;
-    {   U32 const magic = MEM_readLE32(buffer);
-        if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
-    }
-    return 0;
-}
-
-/** ZSTD_frameHeaderSize_internal() :
- *  srcSize must be large enough to reach header size fields.
- *  note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless.
- * @return : size of the Frame Header
- *           or an error code, which can be tested with ZSTD_isError() */
-static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format)
-{
-    size_t const minInputSize = ZSTD_startingInputLength(format);
-    RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, "");
-
-    {   BYTE const fhd = ((const BYTE*)src)[minInputSize-1];
-        U32 const dictID= fhd & 3;
-        U32 const singleSegment = (fhd >> 5) & 1;
-        U32 const fcsId = fhd >> 6;
-        return minInputSize + !singleSegment
-             + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId]
-             + (singleSegment && !fcsId);
-    }
-}
-
-/** ZSTD_frameHeaderSize() :
- *  srcSize must be >= ZSTD_frameHeaderSize_prefix.
- * @return : size of the Frame Header,
- *           or an error code (if srcSize is too small) */
-size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
-{
-    return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1);
-}
-
-
-/** ZSTD_getFrameHeader_advanced() :
- *  decode Frame Header, or require larger `srcSize`.
- *  note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
- * @return : 0, `zfhPtr` is correctly filled,
- *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
- *           or an error code, which can be tested using ZSTD_isError() */
-size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
-{
-    const BYTE* ip = (const BYTE*)src;
-    size_t const minInputSize = ZSTD_startingInputLength(format);
-
-    ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));   /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
-    if (srcSize < minInputSize) return minInputSize;
-    RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter");
-
-    if ( (format != ZSTD_f_zstd1_magicless)
-      && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
-        if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
-            /* skippable frame */
-            if (srcSize < ZSTD_SKIPPABLEHEADERSIZE)
-                return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */
-            ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));
-            zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE);
-            zfhPtr->frameType = ZSTD_skippableFrame;
-            return 0;
-        }
-        RETURN_ERROR(prefix_unknown, "");
-    }
-
-    /* ensure there is enough `srcSize` to fully read/decode frame header */
-    {   size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format);
-        if (srcSize < fhsize) return fhsize;
-        zfhPtr->headerSize = (U32)fhsize;
-    }
-
-    {   BYTE const fhdByte = ip[minInputSize-1];
-        size_t pos = minInputSize;
-        U32 const dictIDSizeCode = fhdByte&3;
-        U32 const checksumFlag = (fhdByte>>2)&1;
-        U32 const singleSegment = (fhdByte>>5)&1;
-        U32 const fcsID = fhdByte>>6;
-        U64 windowSize = 0;
-        U32 dictID = 0;
-        U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN;
-        RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported,
-                        "reserved bits, must be zero");
-
-        if (!singleSegment) {
-            BYTE const wlByte = ip[pos++];
-            U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
-            RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, "");
-            windowSize = (1ULL << windowLog);
-            windowSize += (windowSize >> 3) * (wlByte&7);
-        }
-        switch(dictIDSizeCode)
-        {
-            default:
-                assert(0);  /* impossible */
-                ZSTD_FALLTHROUGH;
-            case 0 : break;
-            case 1 : dictID = ip[pos]; pos++; break;
-            case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break;
-            case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break;
-        }
-        switch(fcsID)
-        {
-            default:
-                assert(0);  /* impossible */
-                ZSTD_FALLTHROUGH;
-            case 0 : if (singleSegment) frameContentSize = ip[pos]; break;
-            case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break;
-            case 2 : frameContentSize = MEM_readLE32(ip+pos); break;
-            case 3 : frameContentSize = MEM_readLE64(ip+pos); break;
-        }
-        if (singleSegment) windowSize = frameContentSize;
-
-        zfhPtr->frameType = ZSTD_frame;
-        zfhPtr->frameContentSize = frameContentSize;
-        zfhPtr->windowSize = windowSize;
-        zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
-        zfhPtr->dictID = dictID;
-        zfhPtr->checksumFlag = checksumFlag;
-    }
-    return 0;
-}
-
-/** ZSTD_getFrameHeader() :
- *  decode Frame Header, or require larger `srcSize`.
- *  note : this function does not consume input, it only reads it.
- * @return : 0, `zfhPtr` is correctly filled,
- *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
- *           or an error code, which can be tested using ZSTD_isError() */
-size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize)
-{
-    return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1);
-}
-
-/** ZSTD_getFrameContentSize() :
- *  compatible with legacy mode
- * @return : decompressed size of the single frame pointed to be `src` if known, otherwise
- *         - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
- *         - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */
-unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize)
-{
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
-    if (ZSTD_isLegacy(src, srcSize)) {
-        unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize);
-        return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret;
-    }
-#endif
-    {   ZSTD_frameHeader zfh;
-        if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0)
-            return ZSTD_CONTENTSIZE_ERROR;
-        if (zfh.frameType == ZSTD_skippableFrame) {
-            return 0;
-        } else {
-            return zfh.frameContentSize;
-    }   }
-}
-
-static size_t readSkippableFrameSize(void const* src, size_t srcSize)
-{
-    size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE;
-    U32 sizeU32;
-
-    RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
-
-    sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
-    RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
-                    frameParameter_unsupported, "");
-    {
-        size_t const skippableSize = skippableHeaderSize + sizeU32;
-        RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, "");
-        return skippableSize;
-    }
-}
-
-/*! ZSTD_readSkippableFrame() :
- * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer.
- *
- * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
- * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START.  This can be NULL if the caller is not interested
- * in the magicVariant.
- *
- * Returns an error if destination buffer is not large enough, or if the frame is not skippable.
- *
- * @return : number of bytes written or a ZSTD error.
- */
-ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant,
-                                            const void* src, size_t srcSize)
-{
-    U32 const magicNumber = MEM_readLE32(src);
-    size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
-    size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
-
-    /* check input validity */
-    RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
-    RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
-    RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
-
-    /* deliver payload */
-    if (skippableContentSize > 0  && dst != NULL)
-        ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
-    if (magicVariant != NULL)
-        *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
-    return skippableContentSize;
-}
-
-/** ZSTD_findDecompressedSize() :
- *  compatible with legacy mode
- *  `srcSize` must be the exact length of some number of ZSTD compressed and/or
- *      skippable frames
- *  @return : decompressed size of the frames contained */
-unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
-{
-    unsigned long long totalDstSize = 0;
-
-    while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
-        U32 const magicNumber = MEM_readLE32(src);
-
-        if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
-            size_t const skippableSize = readSkippableFrameSize(src, srcSize);
-            if (ZSTD_isError(skippableSize)) {
-                return ZSTD_CONTENTSIZE_ERROR;
-            }
-            assert(skippableSize <= srcSize);
-
-            src = (const BYTE *)src + skippableSize;
-            srcSize -= skippableSize;
-            continue;
-        }
-
-        {   unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
-            if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret;
-
-            /* check for overflow */
-            if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR;
-            totalDstSize += ret;
-        }
-        {   size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
-            if (ZSTD_isError(frameSrcSize)) {
-                return ZSTD_CONTENTSIZE_ERROR;
-            }
-
-            src = (const BYTE *)src + frameSrcSize;
-            srcSize -= frameSrcSize;
-        }
-    }  /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
-
-    if (srcSize) return ZSTD_CONTENTSIZE_ERROR;
-
-    return totalDstSize;
-}
-
-/** ZSTD_getDecompressedSize() :
- *  compatible with legacy mode
- * @return : decompressed size if known, 0 otherwise
-             note : 0 can mean any of the following :
-                   - frame content is empty
-                   - decompressed size field is not present in frame header
-                   - frame header unknown / not supported
-                   - frame header not complete (`srcSize` too small) */
-unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize)
-{
-    unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
-    ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN);
-    return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret;
-}
-
-
-/** ZSTD_decodeFrameHeader() :
- * `headerSize` must be the size provided by ZSTD_frameHeaderSize().
- * If multiple DDict references are enabled, also will choose the correct DDict to use.
- * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
-static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
-{
-    size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
-    if (ZSTD_isError(result)) return result;    /* invalid header */
-    RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small");
-
-    /* Reference DDict requested by frame if dctx references multiple ddicts */
-    if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) {
-        ZSTD_DCtx_selectFrameDDict(dctx);
-    }
-
-#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
-    /* Skip the dictID check in fuzzing mode, because it makes the search
-     * harder.
-     */
-    RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID),
-                    dictionary_wrong, "");
-#endif
-    dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0;
-    if (dctx->validateChecksum) XXH64_reset(&dctx->xxhState, 0);
-    dctx->processedCSize += headerSize;
-    return 0;
-}
-
-static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret)
-{
-    ZSTD_frameSizeInfo frameSizeInfo;
-    frameSizeInfo.compressedSize = ret;
-    frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
-    return frameSizeInfo;
-}
-
-static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize)
-{
-    ZSTD_frameSizeInfo frameSizeInfo;
-    ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
-
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
-    if (ZSTD_isLegacy(src, srcSize))
-        return ZSTD_findFrameSizeInfoLegacy(src, srcSize);
-#endif
-
-    if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
-        && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
-        frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
-        assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
-               frameSizeInfo.compressedSize <= srcSize);
-        return frameSizeInfo;
-    } else {
-        const BYTE* ip = (const BYTE*)src;
-        const BYTE* const ipstart = ip;
-        size_t remainingSize = srcSize;
-        size_t nbBlocks = 0;
-        ZSTD_frameHeader zfh;
-
-        /* Extract Frame Header */
-        {   size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize);
-            if (ZSTD_isError(ret))
-                return ZSTD_errorFrameSizeInfo(ret);
-            if (ret > 0)
-                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
-        }
-
-        ip += zfh.headerSize;
-        remainingSize -= zfh.headerSize;
-
-        /* Iterate over each block */
-        while (1) {
-            blockProperties_t blockProperties;
-            size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
-            if (ZSTD_isError(cBlockSize))
-                return ZSTD_errorFrameSizeInfo(cBlockSize);
-
-            if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
-                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
-
-            ip += ZSTD_blockHeaderSize + cBlockSize;
-            remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
-            nbBlocks++;
-
-            if (blockProperties.lastBlock) break;
-        }
-
-        /* Final frame content checksum */
-        if (zfh.checksumFlag) {
-            if (remainingSize < 4)
-                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
-            ip += 4;
-        }
-
-        frameSizeInfo.compressedSize = (size_t)(ip - ipstart);
-        frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
-                                        ? zfh.frameContentSize
-                                        : nbBlocks * zfh.blockSizeMax;
-        return frameSizeInfo;
-    }
-}
-
-/** ZSTD_findFrameCompressedSize() :
- *  compatible with legacy mode
- *  `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
- *  `srcSize` must be at least as large as the frame contained
- *  @return : the compressed size of the frame starting at `src` */
-size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
-{
-    ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
-    return frameSizeInfo.compressedSize;
-}
-
-/** ZSTD_decompressBound() :
- *  compatible with legacy mode
- *  `src` must point to the start of a ZSTD frame or a skippeable frame
- *  `srcSize` must be at least as large as the frame contained
- *  @return : the maximum decompressed size of the compressed source
- */
-unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
-{
-    unsigned long long bound = 0;
-    /* Iterate over each frame */
-    while (srcSize > 0) {
-        ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
-        size_t const compressedSize = frameSizeInfo.compressedSize;
-        unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
-        if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
-            return ZSTD_CONTENTSIZE_ERROR;
-        assert(srcSize >= compressedSize);
-        src = (const BYTE*)src + compressedSize;
-        srcSize -= compressedSize;
-        bound += decompressedBound;
-    }
-    return bound;
-}
-
-
-/*-*************************************************************
- *   Frame decoding
- ***************************************************************/
-
-/** ZSTD_insertBlock() :
- *  insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
-size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
-{
-    DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize);
-    ZSTD_checkContinuity(dctx, blockStart, blockSize);
-    dctx->previousDstEnd = (const char*)blockStart + blockSize;
-    return blockSize;
-}
-
-
-static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
-                          const void* src, size_t srcSize)
-{
-    DEBUGLOG(5, "ZSTD_copyRawBlock");
-    RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, "");
-    if (dst == NULL) {
-        if (srcSize == 0) return 0;
-        RETURN_ERROR(dstBuffer_null, "");
-    }
-    ZSTD_memcpy(dst, src, srcSize);
-    return srcSize;
-}
-
-static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
-                               BYTE b,
-                               size_t regenSize)
-{
-    RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, "");
-    if (dst == NULL) {
-        if (regenSize == 0) return 0;
-        RETURN_ERROR(dstBuffer_null, "");
-    }
-    ZSTD_memset(dst, b, regenSize);
-    return regenSize;
-}
-
-static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming)
-{
-#if ZSTD_TRACE
-    if (dctx->traceCtx && ZSTD_trace_decompress_end != NULL) {
-        ZSTD_Trace trace;
-        ZSTD_memset(&trace, 0, sizeof(trace));
-        trace.version = ZSTD_VERSION_NUMBER;
-        trace.streaming = streaming;
-        if (dctx->ddict) {
-            trace.dictionaryID = ZSTD_getDictID_fromDDict(dctx->ddict);
-            trace.dictionarySize = ZSTD_DDict_dictSize(dctx->ddict);
-            trace.dictionaryIsCold = dctx->ddictIsCold;
-        }
-        trace.uncompressedSize = (size_t)uncompressedSize;
-        trace.compressedSize = (size_t)compressedSize;
-        trace.dctx = dctx;
-        ZSTD_trace_decompress_end(dctx->traceCtx, &trace);
-    }
-#else
-    (void)dctx;
-    (void)uncompressedSize;
-    (void)compressedSize;
-    (void)streaming;
-#endif
-}
-
-
-/*! ZSTD_decompressFrame() :
- * @dctx must be properly initialized
- *  will update *srcPtr and *srcSizePtr,
- *  to make *srcPtr progress by one frame. */
-static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
-                                   void* dst, size_t dstCapacity,
-                             const void** srcPtr, size_t *srcSizePtr)
-{
-    const BYTE* const istart = (const BYTE*)(*srcPtr);
-    const BYTE* ip = istart;
-    BYTE* const ostart = (BYTE*)dst;
-    BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart;
-    BYTE* op = ostart;
-    size_t remainingSrcSize = *srcSizePtr;
-
-    DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr);
-
-    /* check */
-    RETURN_ERROR_IF(
-        remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize,
-        srcSize_wrong, "");
-
-    /* Frame Header */
-    {   size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
-                ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
-        if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
-        RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
-                        srcSize_wrong, "");
-        FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , "");
-        ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
-    }
-
-    /* Loop on each block */
-    while (1) {
-        size_t decodedSize;
-        blockProperties_t blockProperties;
-        size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties);
-        if (ZSTD_isError(cBlockSize)) return cBlockSize;
-
-        ip += ZSTD_blockHeaderSize;
-        remainingSrcSize -= ZSTD_blockHeaderSize;
-        RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, "");
-
-        switch(blockProperties.blockType)
-        {
-        case bt_compressed:
-            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1, not_streaming);
-            break;
-        case bt_raw :
-            decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize);
-            break;
-        case bt_rle :
-            decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize);
-            break;
-        case bt_reserved :
-        default:
-            RETURN_ERROR(corruption_detected, "invalid block type");
-        }
-
-        if (ZSTD_isError(decodedSize)) return decodedSize;
-        if (dctx->validateChecksum)
-            XXH64_update(&dctx->xxhState, op, decodedSize);
-        if (decodedSize != 0)
-            op += decodedSize;
-        assert(ip != NULL);
-        ip += cBlockSize;
-        remainingSrcSize -= cBlockSize;
-        if (blockProperties.lastBlock) break;
-    }
-
-    if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
-        RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize,
-                        corruption_detected, "");
-    }
-    if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
-        RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, "");
-        if (!dctx->forceIgnoreChecksum) {
-            U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
-            U32 checkRead;
-            checkRead = MEM_readLE32(ip);
-            RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, "");
-        }
-        ip += 4;
-        remainingSrcSize -= 4;
-    }
-    ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0);
-    /* Allow caller to get size read */
-    *srcPtr = ip;
-    *srcSizePtr = remainingSrcSize;
-    return (size_t)(op-ostart);
-}
-
-static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
-                                        void* dst, size_t dstCapacity,
-                                  const void* src, size_t srcSize,
-                                  const void* dict, size_t dictSize,
-                                  const ZSTD_DDict* ddict)
-{
-    void* const dststart = dst;
-    int moreThan1Frame = 0;
-
-    DEBUGLOG(5, "ZSTD_decompressMultiFrame");
-    assert(dict==NULL || ddict==NULL);  /* either dict or ddict set, not both */
-
-    if (ddict) {
-        dict = ZSTD_DDict_dictContent(ddict);
-        dictSize = ZSTD_DDict_dictSize(ddict);
-    }
-
-    while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
-
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
-        if (ZSTD_isLegacy(src, srcSize)) {
-            size_t decodedSize;
-            size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
-            if (ZSTD_isError(frameSize)) return frameSize;
-            RETURN_ERROR_IF(dctx->staticSize, memory_allocation,
-                "legacy support is not compatible with static dctx");
-
-            decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
-            if (ZSTD_isError(decodedSize)) return decodedSize;
-
-            assert(decodedSize <= dstCapacity);
-            dst = (BYTE*)dst + decodedSize;
-            dstCapacity -= decodedSize;
-
-            src = (const BYTE*)src + frameSize;
-            srcSize -= frameSize;
-
-            continue;
-        }
-#endif
-
-        {   U32 const magicNumber = MEM_readLE32(src);
-            DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
-                        (unsigned)magicNumber, ZSTD_MAGICNUMBER);
-            if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
-                size_t const skippableSize = readSkippableFrameSize(src, srcSize);
-                FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed");
-                assert(skippableSize <= srcSize);
-
-                src = (const BYTE *)src + skippableSize;
-                srcSize -= skippableSize;
-                continue;
-        }   }
-
-        if (ddict) {
-            /* we were called from ZSTD_decompress_usingDDict */
-            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), "");
-        } else {
-            /* this will initialize correctly with no dict if dict == NULL, so
-             * use this in all cases but ddict */
-            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), "");
-        }
-        ZSTD_checkContinuity(dctx, dst, dstCapacity);
-
-        {   const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
-                                                    &src, &srcSize);
-            RETURN_ERROR_IF(
-                (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
-             && (moreThan1Frame==1),
-                srcSize_wrong,
-                "At least one frame successfully completed, "
-                "but following bytes are garbage: "
-                "it's more likely to be a srcSize error, "
-                "specifying more input bytes than size of frame(s). "
-                "Note: one could be unlucky, it might be a corruption error instead, "
-                "happening right at the place where we expect zstd magic bytes. "
-                "But this is _much_ less likely than a srcSize field error.");
-            if (ZSTD_isError(res)) return res;
-            assert(res <= dstCapacity);
-            if (res != 0)
-                dst = (BYTE*)dst + res;
-            dstCapacity -= res;
-        }
-        moreThan1Frame = 1;
-    }  /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
-
-    RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed");
-
-    return (size_t)((BYTE*)dst - (BYTE*)dststart);
-}
-
-size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
-                                 void* dst, size_t dstCapacity,
-                           const void* src, size_t srcSize,
-                           const void* dict, size_t dictSize)
-{
-    return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL);
-}
-
-
-static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx)
-{
-    switch (dctx->dictUses) {
-    default:
-        assert(0 /* Impossible */);
-        ZSTD_FALLTHROUGH;
-    case ZSTD_dont_use:
-        ZSTD_clearDict(dctx);
-        return NULL;
-    case ZSTD_use_indefinitely:
-        return dctx->ddict;
-    case ZSTD_use_once:
-        dctx->dictUses = ZSTD_dont_use;
-        return dctx->ddict;
-    }
-}
-
-size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
-{
-    return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx));
-}
-
-
-size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
-{
-#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
-    size_t regenSize;
-    ZSTD_DCtx* const dctx =  ZSTD_createDCtx_internal(ZSTD_defaultCMem);
-    RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!");
-    regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
-    ZSTD_freeDCtx(dctx);
-    return regenSize;
-#else   /* stack mode */
-    ZSTD_DCtx dctx;
-    ZSTD_initDCtx_internal(&dctx);
-    return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize);
-#endif
-}
-
-
-/*-**************************************
-*   Advanced Streaming Decompression API
-*   Bufferless and synchronous
-****************************************/
-size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; }
-
-/**
- * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed,
- * we allow taking a partial block as the input. Currently only raw uncompressed blocks can
- * be streamed.
- *
- * For blocks that can be streamed, this allows us to reduce the latency until we produce
- * output, and avoid copying the input.
- *
- * @param inputSize - The total amount of input that the caller currently has.
- */
-static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) {
-    if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock))
-        return dctx->expected;
-    if (dctx->bType != bt_raw)
-        return dctx->expected;
-    return BOUNDED(1, inputSize, dctx->expected);
-}
-
-ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
-    switch(dctx->stage)
-    {
-    default:   /* should not happen */
-        assert(0);
-        ZSTD_FALLTHROUGH;
-    case ZSTDds_getFrameHeaderSize:
-        ZSTD_FALLTHROUGH;
-    case ZSTDds_decodeFrameHeader:
-        return ZSTDnit_frameHeader;
-    case ZSTDds_decodeBlockHeader:
-        return ZSTDnit_blockHeader;
-    case ZSTDds_decompressBlock:
-        return ZSTDnit_block;
-    case ZSTDds_decompressLastBlock:
-        return ZSTDnit_lastBlock;
-    case ZSTDds_checkChecksum:
-        return ZSTDnit_checksum;
-    case ZSTDds_decodeSkippableHeader:
-        ZSTD_FALLTHROUGH;
-    case ZSTDds_skipFrame:
-        return ZSTDnit_skippableFrame;
-    }
-}
-
-static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; }
-
-/** ZSTD_decompressContinue() :
- *  srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress())
- *  @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
- *            or an error code, which can be tested using ZSTD_isError() */
-size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
-{
-    DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize);
-    /* Sanity check */
-    RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed");
-    ZSTD_checkContinuity(dctx, dst, dstCapacity);
-
-    dctx->processedCSize += srcSize;
-
-    switch (dctx->stage)
-    {
-    case ZSTDds_getFrameHeaderSize :
-        assert(src != NULL);
-        if (dctx->format == ZSTD_f_zstd1) {  /* allows header */
-            assert(srcSize >= ZSTD_FRAMEIDSIZE);  /* to read skippable magic number */
-            if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {        /* skippable frame */
-                ZSTD_memcpy(dctx->headerBuffer, src, srcSize);
-                dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize;  /* remaining to load to get full skippable frame header */
-                dctx->stage = ZSTDds_decodeSkippableHeader;
-                return 0;
-        }   }
-        dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format);
-        if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize;
-        ZSTD_memcpy(dctx->headerBuffer, src, srcSize);
-        dctx->expected = dctx->headerSize - srcSize;
-        dctx->stage = ZSTDds_decodeFrameHeader;
-        return 0;
-
-    case ZSTDds_decodeFrameHeader:
-        assert(src != NULL);
-        ZSTD_memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize);
-        FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), "");
-        dctx->expected = ZSTD_blockHeaderSize;
-        dctx->stage = ZSTDds_decodeBlockHeader;
-        return 0;
-
-    case ZSTDds_decodeBlockHeader:
-        {   blockProperties_t bp;
-            size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
-            if (ZSTD_isError(cBlockSize)) return cBlockSize;
-            RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum");
-            dctx->expected = cBlockSize;
-            dctx->bType = bp.blockType;
-            dctx->rleSize = bp.origSize;
-            if (cBlockSize) {
-                dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock;
-                return 0;
-            }
-            /* empty block */
-            if (bp.lastBlock) {
-                if (dctx->fParams.checksumFlag) {
-                    dctx->expected = 4;
-                    dctx->stage = ZSTDds_checkChecksum;
-                } else {
-                    dctx->expected = 0; /* end of frame */
-                    dctx->stage = ZSTDds_getFrameHeaderSize;
-                }
-            } else {
-                dctx->expected = ZSTD_blockHeaderSize;  /* jump to next header */
-                dctx->stage = ZSTDds_decodeBlockHeader;
-            }
-            return 0;
-        }
-
-    case ZSTDds_decompressLastBlock:
-    case ZSTDds_decompressBlock:
-        DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock");
-        {   size_t rSize;
-            switch(dctx->bType)
-            {
-            case bt_compressed:
-                DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
-                rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1, is_streaming);
-                dctx->expected = 0;  /* Streaming not supported */
-                break;
-            case bt_raw :
-                assert(srcSize <= dctx->expected);
-                rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize);
-                FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed");
-                assert(rSize == srcSize);
-                dctx->expected -= rSize;
-                break;
-            case bt_rle :
-                rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize);
-                dctx->expected = 0;  /* Streaming not supported */
-                break;
-            case bt_reserved :   /* should never happen */
-            default:
-                RETURN_ERROR(corruption_detected, "invalid block type");
-            }
-            FORWARD_IF_ERROR(rSize, "");
-            RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum");
-            DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
-            dctx->decodedSize += rSize;
-            if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, dst, rSize);
-            dctx->previousDstEnd = (char*)dst + rSize;
-
-            /* Stay on the same stage until we are finished streaming the block. */
-            if (dctx->expected > 0) {
-                return rSize;
-            }
-
-            if (dctx->stage == ZSTDds_decompressLastBlock) {   /* end of frame */
-                DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize);
-                RETURN_ERROR_IF(
-                    dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
-                 && dctx->decodedSize != dctx->fParams.frameContentSize,
-                    corruption_detected, "");
-                if (dctx->fParams.checksumFlag) {  /* another round for frame checksum */
-                    dctx->expected = 4;
-                    dctx->stage = ZSTDds_checkChecksum;
-                } else {
-                    ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1);
-                    dctx->expected = 0;   /* ends here */
-                    dctx->stage = ZSTDds_getFrameHeaderSize;
-                }
-            } else {
-                dctx->stage = ZSTDds_decodeBlockHeader;
-                dctx->expected = ZSTD_blockHeaderSize;
-            }
-            return rSize;
-        }
-
-    case ZSTDds_checkChecksum:
-        assert(srcSize == 4);  /* guaranteed by dctx->expected */
-        {
-            if (dctx->validateChecksum) {
-                U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
-                U32 const check32 = MEM_readLE32(src);
-                DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32);
-                RETURN_ERROR_IF(check32 != h32, checksum_wrong, "");
-            }
-            ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1);
-            dctx->expected = 0;
-            dctx->stage = ZSTDds_getFrameHeaderSize;
-            return 0;
-        }
-
-    case ZSTDds_decodeSkippableHeader:
-        assert(src != NULL);
-        assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
-        ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize);   /* complete skippable header */
-        dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE);   /* note : dctx->expected can grow seriously large, beyond local buffer size */
-        dctx->stage = ZSTDds_skipFrame;
-        return 0;
-
-    case ZSTDds_skipFrame:
-        dctx->expected = 0;
-        dctx->stage = ZSTDds_getFrameHeaderSize;
-        return 0;
-
-    default:
-        assert(0);   /* impossible */
-        RETURN_ERROR(GENERIC, "impossible to reach");   /* some compiler require default to do something */
-    }
-}
-
-
-static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
-{
-    dctx->dictEnd = dctx->previousDstEnd;
-    dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
-    dctx->prefixStart = dict;
-    dctx->previousDstEnd = (const char*)dict + dictSize;
-#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
-    dctx->dictContentBeginForFuzzing = dctx->prefixStart;
-    dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
-#endif
-    return 0;
-}
-
-/*! ZSTD_loadDEntropy() :
- *  dict : must point at beginning of a valid zstd dictionary.
- * @return : size of entropy tables read */
-size_t
-ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
-                  const void* const dict, size_t const dictSize)
-{
-    const BYTE* dictPtr = (const BYTE*)dict;
-    const BYTE* const dictEnd = dictPtr + dictSize;
-
-    RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small");
-    assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY);   /* dict must be valid */
-    dictPtr += 8;   /* skip header = magic + dictID */
-
-    ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable));
-    ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable));
-    ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE);
-    {   void* const workspace = &entropy->LLTable;   /* use fse tables as temporary workspace; implies fse tables are grouped together */
-        size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable);
-#ifdef HUF_FORCE_DECOMPRESS_X1
-        /* in minimal huffman, we always use X1 variants */
-        size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable,
-                                                dictPtr, dictEnd - dictPtr,
-                                                workspace, workspaceSize);
-#else
-        size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
-                                                dictPtr, (size_t)(dictEnd - dictPtr),
-                                                workspace, workspaceSize);
-#endif
-        RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, "");
-        dictPtr += hSize;
-    }
-
-    {   short offcodeNCount[MaxOff+1];
-        unsigned offcodeMaxValue = MaxOff, offcodeLog;
-        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr));
-        RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
-        RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, "");
-        RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
-        ZSTD_buildFSETable( entropy->OFTable,
-                            offcodeNCount, offcodeMaxValue,
-                            OF_base, OF_bits,
-                            offcodeLog,
-                            entropy->workspace, sizeof(entropy->workspace),
-                            /* bmi2 */0);
-        dictPtr += offcodeHeaderSize;
-    }
-
-    {   short matchlengthNCount[MaxML+1];
-        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
-        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr));
-        RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
-        RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, "");
-        RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
-        ZSTD_buildFSETable( entropy->MLTable,
-                            matchlengthNCount, matchlengthMaxValue,
-                            ML_base, ML_bits,
-                            matchlengthLog,
-                            entropy->workspace, sizeof(entropy->workspace),
-                            /* bmi2 */ 0);
-        dictPtr += matchlengthHeaderSize;
-    }
-
-    {   short litlengthNCount[MaxLL+1];
-        unsigned litlengthMaxValue = MaxLL, litlengthLog;
-        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr));
-        RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
-        RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, "");
-        RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
-        ZSTD_buildFSETable( entropy->LLTable,
-                            litlengthNCount, litlengthMaxValue,
-                            LL_base, LL_bits,
-                            litlengthLog,
-                            entropy->workspace, sizeof(entropy->workspace),
-                            /* bmi2 */ 0);
-        dictPtr += litlengthHeaderSize;
-    }
-
-    RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");
-    {   int i;
-        size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
-        for (i=0; i<3; i++) {
-            U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
-            RETURN_ERROR_IF(rep==0 || rep > dictContentSize,
-                            dictionary_corrupted, "");
-            entropy->rep[i] = rep;
-    }   }
-
-    return (size_t)(dictPtr - (const BYTE*)dict);
-}
-
-static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
-{
-    if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize);
-    {   U32 const magic = MEM_readLE32(dict);
-        if (magic != ZSTD_MAGIC_DICTIONARY) {
-            return ZSTD_refDictContent(dctx, dict, dictSize);   /* pure content mode */
-    }   }
-    dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
-
-    /* load entropy tables */
-    {   size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize);
-        RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, "");
-        dict = (const char*)dict + eSize;
-        dictSize -= eSize;
-    }
-    dctx->litEntropy = dctx->fseEntropy = 1;
-
-    /* reference dictionary content */
-    return ZSTD_refDictContent(dctx, dict, dictSize);
-}
-
-size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
-{
-    assert(dctx != NULL);
-#if ZSTD_TRACE
-    dctx->traceCtx = (ZSTD_trace_decompress_begin != NULL) ? ZSTD_trace_decompress_begin(dctx) : 0;
-#endif
-    dctx->expected = ZSTD_startingInputLength(dctx->format);  /* dctx->format must be properly set */
-    dctx->stage = ZSTDds_getFrameHeaderSize;
-    dctx->processedCSize = 0;
-    dctx->decodedSize = 0;
-    dctx->previousDstEnd = NULL;
-    dctx->prefixStart = NULL;
-    dctx->virtualStart = NULL;
-    dctx->dictEnd = NULL;
-    dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
-    dctx->litEntropy = dctx->fseEntropy = 0;
-    dctx->dictID = 0;
-    dctx->bType = bt_reserved;
-    ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
-    ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue));  /* initial repcodes */
-    dctx->LLTptr = dctx->entropy.LLTable;
-    dctx->MLTptr = dctx->entropy.MLTable;
-    dctx->OFTptr = dctx->entropy.OFTable;
-    dctx->HUFptr = dctx->entropy.hufTable;
-    return 0;
-}
-
-size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
-{
-    FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , "");
-    if (dict && dictSize)
-        RETURN_ERROR_IF(
-            ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)),
-            dictionary_corrupted, "");
-    return 0;
-}
-
-
-/* ======   ZSTD_DDict   ====== */
-
-size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
-{
-    DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict");
-    assert(dctx != NULL);
-    if (ddict) {
-        const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict);
-        size_t const dictSize = ZSTD_DDict_dictSize(ddict);
-        const void* const dictEnd = dictStart + dictSize;
-        dctx->ddictIsCold = (dctx->dictEnd != dictEnd);
-        DEBUGLOG(4, "DDict is %s",
-                    dctx->ddictIsCold ? "~cold~" : "hot!");
-    }
-    FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , "");
-    if (ddict) {   /* NULL ddict is equivalent to no dictionary */
-        ZSTD_copyDDictParameters(dctx, ddict);
-    }
-    return 0;
-}
-
-/*! ZSTD_getDictID_fromDict() :
- *  Provides the dictID stored within dictionary.
- *  if @return == 0, the dictionary is not conformant with Zstandard specification.
- *  It can still be loaded, but as a content-only dictionary. */
-unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
-{
-    if (dictSize < 8) return 0;
-    if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0;
-    return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
-}
-
-/*! ZSTD_getDictID_fromFrame() :
- *  Provides the dictID required to decompress frame stored within `src`.
- *  If @return == 0, the dictID could not be decoded.
- *  This could for one of the following reasons :
- *  - The frame does not require a dictionary (most common case).
- *  - The frame was built with dictID intentionally removed.
- *    Needed dictionary is a hidden information.
- *    Note : this use case also happens when using a non-conformant dictionary.
- *  - `srcSize` is too small, and as a result, frame header could not be decoded.
- *    Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`.
- *  - This is not a Zstandard frame.
- *  When identifying the exact failure cause, it's possible to use
- *  ZSTD_getFrameHeader(), which will provide a more precise error code. */
-unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
-{
-    ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 };
-    size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
-    if (ZSTD_isError(hError)) return 0;
-    return zfp.dictID;
-}
-
-
-/*! ZSTD_decompress_usingDDict() :
-*   Decompression using a pre-digested Dictionary
-*   Use dictionary without significant overhead. */
-size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
-                                  void* dst, size_t dstCapacity,
-                            const void* src, size_t srcSize,
-                            const ZSTD_DDict* ddict)
-{
-    /* pass content and size in case legacy frames are encountered */
-    return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize,
-                                     NULL, 0,
-                                     ddict);
-}
-
-
-/*=====================================
-*   Streaming decompression
-*====================================*/
-
-ZSTD_DStream* ZSTD_createDStream(void)
-{
-    DEBUGLOG(3, "ZSTD_createDStream");
-    return ZSTD_createDCtx_internal(ZSTD_defaultCMem);
-}
-
-ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
-{
-    return ZSTD_initStaticDCtx(workspace, workspaceSize);
-}
-
-ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem)
-{
-    return ZSTD_createDCtx_internal(customMem);
-}
-
-size_t ZSTD_freeDStream(ZSTD_DStream* zds)
-{
-    return ZSTD_freeDCtx(zds);
-}
-
-
-/* ***  Initialization  *** */
-
-size_t ZSTD_DStreamInSize(void)  { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; }
-size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
-
-size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
-                                   const void* dict, size_t dictSize,
-                                         ZSTD_dictLoadMethod_e dictLoadMethod,
-                                         ZSTD_dictContentType_e dictContentType)
-{
-    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
-    ZSTD_clearDict(dctx);
-    if (dict && dictSize != 0) {
-        dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
-        RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!");
-        dctx->ddict = dctx->ddictLocal;
-        dctx->dictUses = ZSTD_use_indefinitely;
-    }
-    return 0;
-}
-
-size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
-{
-    return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
-}
-
-size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
-{
-    return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
-}
-
-size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
-{
-    FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), "");
-    dctx->dictUses = ZSTD_use_once;
-    return 0;
-}
-
-size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize)
-{
-    return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent);
-}
-
-
-/* ZSTD_initDStream_usingDict() :
- * return : expected size, aka ZSTD_startingInputLength().
- * this function cannot fail */
-size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
-{
-    DEBUGLOG(4, "ZSTD_initDStream_usingDict");
-    FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , "");
-    FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , "");
-    return ZSTD_startingInputLength(zds->format);
-}
-
-/* note : this variant can't fail */
-size_t ZSTD_initDStream(ZSTD_DStream* zds)
-{
-    DEBUGLOG(4, "ZSTD_initDStream");
-    return ZSTD_initDStream_usingDDict(zds, NULL);
-}
-
-/* ZSTD_initDStream_usingDDict() :
- * ddict will just be referenced, and must outlive decompression session
- * this function cannot fail */
-size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
-{
-    FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , "");
-    FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , "");
-    return ZSTD_startingInputLength(dctx->format);
-}
-
-/* ZSTD_resetDStream() :
- * return : expected size, aka ZSTD_startingInputLength().
- * this function cannot fail */
-size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
-{
-    FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), "");
-    return ZSTD_startingInputLength(dctx->format);
-}
-
-
-size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
-{
-    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
-    ZSTD_clearDict(dctx);
-    if (ddict) {
-        dctx->ddict = ddict;
-        dctx->dictUses = ZSTD_use_indefinitely;
-        if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) {
-            if (dctx->ddictSet == NULL) {
-                dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem);
-                if (!dctx->ddictSet) {
-                    RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!");
-                }
-            }
-            assert(!dctx->staticSize);  /* Impossible: ddictSet cannot have been allocated if static dctx */
-            FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, dctx->customMem), "");
-        }
-    }
-    return 0;
-}
-
-/* ZSTD_DCtx_setMaxWindowSize() :
- * note : no direct equivalence in ZSTD_DCtx_setParameter,
- * since this version sets windowSize, and the other sets windowLog */
-size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
-{
-    ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax);
-    size_t const min = (size_t)1 << bounds.lowerBound;
-    size_t const max = (size_t)1 << bounds.upperBound;
-    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
-    RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, "");
-    RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, "");
-    dctx->maxWindowSize = maxWindowSize;
-    return 0;
-}
-
-size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format)
-{
-    return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (int)format);
-}
-
-ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
-{
-    ZSTD_bounds bounds = { 0, 0, 0 };
-    switch(dParam) {
-        case ZSTD_d_windowLogMax:
-            bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN;
-            bounds.upperBound = ZSTD_WINDOWLOG_MAX;
-            return bounds;
-        case ZSTD_d_format:
-            bounds.lowerBound = (int)ZSTD_f_zstd1;
-            bounds.upperBound = (int)ZSTD_f_zstd1_magicless;
-            ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
-            return bounds;
-        case ZSTD_d_stableOutBuffer:
-            bounds.lowerBound = (int)ZSTD_bm_buffered;
-            bounds.upperBound = (int)ZSTD_bm_stable;
-            return bounds;
-        case ZSTD_d_forceIgnoreChecksum:
-            bounds.lowerBound = (int)ZSTD_d_validateChecksum;
-            bounds.upperBound = (int)ZSTD_d_ignoreChecksum;
-            return bounds;
-        case ZSTD_d_refMultipleDDicts:
-            bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict;
-            bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts;
-            return bounds;
-        default:;
-    }
-    bounds.error = ERROR(parameter_unsupported);
-    return bounds;
-}
-
-/* ZSTD_dParam_withinBounds:
- * @return 1 if value is within dParam bounds,
- * 0 otherwise */
-static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value)
-{
-    ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam);
-    if (ZSTD_isError(bounds.error)) return 0;
-    if (value < bounds.lowerBound) return 0;
-    if (value > bounds.upperBound) return 0;
-    return 1;
-}
-
-#define CHECK_DBOUNDS(p,v) {                \
-    RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \
-}
-
-size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value)
-{
-    switch (param) {
-        case ZSTD_d_windowLogMax:
-            *value = (int)ZSTD_highbit32((U32)dctx->maxWindowSize);
-            return 0;
-        case ZSTD_d_format:
-            *value = (int)dctx->format;
-            return 0;
-        case ZSTD_d_stableOutBuffer:
-            *value = (int)dctx->outBufferMode;
-            return 0;
-        case ZSTD_d_forceIgnoreChecksum:
-            *value = (int)dctx->forceIgnoreChecksum;
-            return 0;
-        case ZSTD_d_refMultipleDDicts:
-            *value = (int)dctx->refMultipleDDicts;
-            return 0;
-        default:;
-    }
-    RETURN_ERROR(parameter_unsupported, "");
-}
-
-size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value)
-{
-    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
-    switch(dParam) {
-        case ZSTD_d_windowLogMax:
-            if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT;
-            CHECK_DBOUNDS(ZSTD_d_windowLogMax, value);
-            dctx->maxWindowSize = ((size_t)1) << value;
-            return 0;
-        case ZSTD_d_format:
-            CHECK_DBOUNDS(ZSTD_d_format, value);
-            dctx->format = (ZSTD_format_e)value;
-            return 0;
-        case ZSTD_d_stableOutBuffer:
-            CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value);
-            dctx->outBufferMode = (ZSTD_bufferMode_e)value;
-            return 0;
-        case ZSTD_d_forceIgnoreChecksum:
-            CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value);
-            dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value;
-            return 0;
-        case ZSTD_d_refMultipleDDicts:
-            CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value);
-            if (dctx->staticSize != 0) {
-                RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!");
-            }
-            dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value;
-            return 0;
-        default:;
-    }
-    RETURN_ERROR(parameter_unsupported, "");
-}
-
-size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
-{
-    if ( (reset == ZSTD_reset_session_only)
-      || (reset == ZSTD_reset_session_and_parameters) ) {
-        dctx->streamStage = zdss_init;
-        dctx->noForwardProgress = 0;
-    }
-    if ( (reset == ZSTD_reset_parameters)
-      || (reset == ZSTD_reset_session_and_parameters) ) {
-        RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
-        ZSTD_clearDict(dctx);
-        ZSTD_DCtx_resetParameters(dctx);
-    }
-    return 0;
-}
-
-
-size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
-{
-    return ZSTD_sizeof_DCtx(dctx);
-}
-
-size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
-{
-    size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
-    /* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/
-    unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2);
-    unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
-    size_t const minRBSize = (size_t) neededSize;
-    RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
-                    frameParameter_windowTooLarge, "");
-    return minRBSize;
-}
-
-size_t ZSTD_estimateDStreamSize(size_t windowSize)
-{
-    size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
-    size_t const inBuffSize = blockSize;  /* no block can be larger */
-    size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN);
-    return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize;
-}
-
-size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
-{
-    U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX;   /* note : should be user-selectable, but requires an additional parameter (or a dctx) */
-    ZSTD_frameHeader zfh;
-    size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize);
-    if (ZSTD_isError(err)) return err;
-    RETURN_ERROR_IF(err>0, srcSize_wrong, "");
-    RETURN_ERROR_IF(zfh.windowSize > windowSizeMax,
-                    frameParameter_windowTooLarge, "");
-    return ZSTD_estimateDStreamSize((size_t)zfh.windowSize);
-}
-
-
-/* *****   Decompression   ***** */
-
-static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize)
-{
-    return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR;
-}
-
-static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize)
-{
-    if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize))
-        zds->oversizedDuration++;
-    else
-        zds->oversizedDuration = 0;
-}
-
-static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds)
-{
-    return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION;
-}
-
-/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */
-static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output)
-{
-    ZSTD_outBuffer const expect = zds->expectedOutBuffer;
-    /* No requirement when ZSTD_obm_stable is not enabled. */
-    if (zds->outBufferMode != ZSTD_bm_stable)
-        return 0;
-    /* Any buffer is allowed in zdss_init, this must be the same for every other call until
-     * the context is reset.
-     */
-    if (zds->streamStage == zdss_init)
-        return 0;
-    /* The buffer must match our expectation exactly. */
-    if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size)
-        return 0;
-    RETURN_ERROR(dstBuffer_wrong, "ZSTD_d_stableOutBuffer enabled but output differs!");
-}
-
-/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream()
- * and updates the stage and the output buffer state. This call is extracted so it can be
- * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode.
- * NOTE: You must break after calling this function since the streamStage is modified.
- */
-static size_t ZSTD_decompressContinueStream(
-            ZSTD_DStream* zds, char** op, char* oend,
-            void const* src, size_t srcSize) {
-    int const isSkipFrame = ZSTD_isSkipFrame(zds);
-    if (zds->outBufferMode == ZSTD_bm_buffered) {
-        size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart;
-        size_t const decodedSize = ZSTD_decompressContinue(zds,
-                zds->outBuff + zds->outStart, dstSize, src, srcSize);
-        FORWARD_IF_ERROR(decodedSize, "");
-        if (!decodedSize && !isSkipFrame) {
-            zds->streamStage = zdss_read;
-        } else {
-            zds->outEnd = zds->outStart + decodedSize;
-            zds->streamStage = zdss_flush;
-        }
-    } else {
-        /* Write directly into the output buffer */
-        size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op);
-        size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize);
-        FORWARD_IF_ERROR(decodedSize, "");
-        *op += decodedSize;
-        /* Flushing is not needed. */
-        zds->streamStage = zdss_read;
-        assert(*op <= oend);
-        assert(zds->outBufferMode == ZSTD_bm_stable);
-    }
-    return 0;
-}
-
-size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
-{
-    const char* const src = (const char*)input->src;
-    const char* const istart = input->pos != 0 ? src + input->pos : src;
-    const char* const iend = input->size != 0 ? src + input->size : src;
-    const char* ip = istart;
-    char* const dst = (char*)output->dst;
-    char* const ostart = output->pos != 0 ? dst + output->pos : dst;
-    char* const oend = output->size != 0 ? dst + output->size : dst;
-    char* op = ostart;
-    U32 someMoreWork = 1;
-
-    DEBUGLOG(5, "ZSTD_decompressStream");
-    RETURN_ERROR_IF(
-        input->pos > input->size,
-        srcSize_wrong,
-        "forbidden. in: pos: %u   vs size: %u",
-        (U32)input->pos, (U32)input->size);
-    RETURN_ERROR_IF(
-        output->pos > output->size,
-        dstSize_tooSmall,
-        "forbidden. out: pos: %u   vs size: %u",
-        (U32)output->pos, (U32)output->size);
-    DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos));
-    FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), "");
-
-    while (someMoreWork) {
-        switch(zds->streamStage)
-        {
-        case zdss_init :
-            DEBUGLOG(5, "stage zdss_init => transparent reset ");
-            zds->streamStage = zdss_loadHeader;
-            zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
-            zds->legacyVersion = 0;
-#endif
-            zds->hostageByte = 0;
-            zds->expectedOutBuffer = *output;
-            ZSTD_FALLTHROUGH;
-
-        case zdss_loadHeader :
-            DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip));
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
-            if (zds->legacyVersion) {
-                RETURN_ERROR_IF(zds->staticSize, memory_allocation,
-                    "legacy support is incompatible with static dctx");
-                {   size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
-                    if (hint==0) zds->streamStage = zdss_init;
-                    return hint;
-            }   }
-#endif
-            {   size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
-                if (zds->refMultipleDDicts && zds->ddictSet) {
-                    ZSTD_DCtx_selectFrameDDict(zds);
-                }
-                DEBUGLOG(5, "header size : %u", (U32)hSize);
-                if (ZSTD_isError(hSize)) {
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
-                    U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
-                    if (legacyVersion) {
-                        ZSTD_DDict const* const ddict = ZSTD_getDDict(zds);
-                        const void* const dict = ddict ? ZSTD_DDict_dictContent(ddict) : NULL;
-                        size_t const dictSize = ddict ? ZSTD_DDict_dictSize(ddict) : 0;
-                        DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion);
-                        RETURN_ERROR_IF(zds->staticSize, memory_allocation,
-                            "legacy support is incompatible with static dctx");
-                        FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext,
-                                    zds->previousLegacyVersion, legacyVersion,
-                                    dict, dictSize), "");
-                        zds->legacyVersion = zds->previousLegacyVersion = legacyVersion;
-                        {   size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input);
-                            if (hint==0) zds->streamStage = zdss_init;   /* or stay in stage zdss_loadHeader */
-                            return hint;
-                    }   }
-#endif
-                    return hSize;   /* error */
-                }
-                if (hSize != 0) {   /* need more input */
-                    size_t const toLoad = hSize - zds->lhSize;   /* if hSize!=0, hSize > zds->lhSize */
-                    size_t const remainingInput = (size_t)(iend-ip);
-                    assert(iend >= ip);
-                    if (toLoad > remainingInput) {   /* not enough input to load full header */
-                        if (remainingInput > 0) {
-                            ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput);
-                            zds->lhSize += remainingInput;
-                        }
-                        input->pos = input->size;
-                        return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize;   /* remaining header bytes + next block header */
-                    }
-                    assert(ip != NULL);
-                    ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
-                    break;
-            }   }
-
-            /* check for single-pass mode opportunity */
-            if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
-                && zds->fParams.frameType != ZSTD_skippableFrame
-                && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
-                size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart));
-                if (cSize <= (size_t)(iend-istart)) {
-                    /* shortcut : using single-pass mode */
-                    size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds));
-                    if (ZSTD_isError(decompressedSize)) return decompressedSize;
-                    DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
-                    ip = istart + cSize;
-                    op += decompressedSize;
-                    zds->expected = 0;
-                    zds->streamStage = zdss_init;
-                    someMoreWork = 0;
-                    break;
-            }   }
-
-            /* Check output buffer is large enough for ZSTD_odm_stable. */
-            if (zds->outBufferMode == ZSTD_bm_stable
-                && zds->fParams.frameType != ZSTD_skippableFrame
-                && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
-                && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) {
-                RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small");
-            }
-
-            /* Consume header (see ZSTDds_decodeFrameHeader) */
-            DEBUGLOG(4, "Consume header");
-            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");
-
-            if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {  /* skippable frame */
-                zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
-                zds->stage = ZSTDds_skipFrame;
-            } else {
-                FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), "");
-                zds->expected = ZSTD_blockHeaderSize;
-                zds->stage = ZSTDds_decodeBlockHeader;
-            }
-
-            /* control buffer memory usage */
-            DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)",
-                        (U32)(zds->fParams.windowSize >>10),
-                        (U32)(zds->maxWindowSize >> 10) );
-            zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
-            RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
-                            frameParameter_windowTooLarge, "");
-
-            /* Adapt buffer sizes to frame header instructions */
-            {   size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
-                size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered
-                        ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize)
-                        : 0;
-
-                ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize);
-
-                {   int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize);
-                    int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds);
-
-                    if (tooSmall || tooLarge) {
-                        size_t const bufferSize = neededInBuffSize + neededOutBuffSize;
-                        DEBUGLOG(4, "inBuff  : from %u to %u",
-                                    (U32)zds->inBuffSize, (U32)neededInBuffSize);
-                        DEBUGLOG(4, "outBuff : from %u to %u",
-                                    (U32)zds->outBuffSize, (U32)neededOutBuffSize);
-                        if (zds->staticSize) {  /* static DCtx */
-                            DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize);
-                            assert(zds->staticSize >= sizeof(ZSTD_DCtx));  /* controlled at init */
-                            RETURN_ERROR_IF(
-                                bufferSize > zds->staticSize - sizeof(ZSTD_DCtx),
-                                memory_allocation, "");
-                        } else {
-                            ZSTD_customFree(zds->inBuff, zds->customMem);
-                            zds->inBuffSize = 0;
-                            zds->outBuffSize = 0;
-                            zds->inBuff = (char*)ZSTD_customMalloc(bufferSize, zds->customMem);
-                            RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, "");
-                        }
-                        zds->inBuffSize = neededInBuffSize;
-                        zds->outBuff = zds->inBuff + zds->inBuffSize;
-                        zds->outBuffSize = neededOutBuffSize;
-            }   }   }
-            zds->streamStage = zdss_read;
-            ZSTD_FALLTHROUGH;
-
-        case zdss_read:
-            DEBUGLOG(5, "stage zdss_read");
-            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip));
-                DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize);
-                if (neededInSize==0) {  /* end of frame */
-                    zds->streamStage = zdss_init;
-                    someMoreWork = 0;
-                    break;
-                }
-                if ((size_t)(iend-ip) >= neededInSize) {  /* decode directly from src */
-                    FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), "");
-                    ip += neededInSize;
-                    /* Function modifies the stage so we must break */
-                    break;
-            }   }
-            if (ip==iend) { someMoreWork = 0; break; }   /* no more input */
-            zds->streamStage = zdss_load;
-            ZSTD_FALLTHROUGH;
-
-        case zdss_load:
-            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
-                size_t const toLoad = neededInSize - zds->inPos;
-                int const isSkipFrame = ZSTD_isSkipFrame(zds);
-                size_t loadedSize;
-                /* At this point we shouldn't be decompressing a block that we can stream. */
-                assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip));
-                if (isSkipFrame) {
-                    loadedSize = MIN(toLoad, (size_t)(iend-ip));
-                } else {
-                    RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos,
-                                    corruption_detected,
-                                    "should never happen");
-                    loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip));
-                }
-                ip += loadedSize;
-                zds->inPos += loadedSize;
-                if (loadedSize < toLoad) { someMoreWork = 0; break; }   /* not enough input, wait for more */
-
-                /* decode loaded input */
-                zds->inPos = 0;   /* input is consumed */
-                FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), "");
-                /* Function modifies the stage so we must break */
-                break;
-            }
-        case zdss_flush:
-            {   size_t const toFlushSize = zds->outEnd - zds->outStart;
-                size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize);
-                op += flushedSize;
-                zds->outStart += flushedSize;
-                if (flushedSize == toFlushSize) {  /* flush completed */
-                    zds->streamStage = zdss_read;
-                    if ( (zds->outBuffSize < zds->fParams.frameContentSize)
-                      && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) {
-                        DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)",
-                                (int)(zds->outBuffSize - zds->outStart),
-                                (U32)zds->fParams.blockSizeMax);
-                        zds->outStart = zds->outEnd = 0;
-                    }
-                    break;
-            }   }
-            /* cannot complete flush */
-            someMoreWork = 0;
-            break;
-
-        default:
-            assert(0);    /* impossible */
-            RETURN_ERROR(GENERIC, "impossible to reach");   /* some compiler require default to do something */
-    }   }
-
-    /* result */
-    input->pos = (size_t)(ip - (const char*)(input->src));
-    output->pos = (size_t)(op - (char*)(output->dst));
-
-    /* Update the expected output buffer for ZSTD_obm_stable. */
-    zds->expectedOutBuffer = *output;
-
-    if ((ip==istart) && (op==ostart)) {  /* no forward progress */
-        zds->noForwardProgress ++;
-        if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
-            RETURN_ERROR_IF(op==oend, dstSize_tooSmall, "");
-            RETURN_ERROR_IF(ip==iend, srcSize_wrong, "");
-            assert(0);
-        }
-    } else {
-        zds->noForwardProgress = 0;
-    }
-    {   size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds);
-        if (!nextSrcSizeHint) {   /* frame fully decoded */
-            if (zds->outEnd == zds->outStart) {  /* output fully flushed */
-                if (zds->hostageByte) {
-                    if (input->pos >= input->size) {
-                        /* can't release hostage (not present) */
-                        zds->streamStage = zdss_read;
-                        return 1;
-                    }
-                    input->pos++;  /* release hostage */
-                }   /* zds->hostageByte */
-                return 0;
-            }  /* zds->outEnd == zds->outStart */
-            if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */
-                input->pos--;   /* note : pos > 0, otherwise, impossible to finish reading last block */
-                zds->hostageByte=1;
-            }
-            return 1;
-        }  /* nextSrcSizeHint==0 */
-        nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block);   /* preload header of next block */
-        assert(zds->inPos <= nextSrcSizeHint);
-        nextSrcSizeHint -= zds->inPos;   /* part already loaded*/
-        return nextSrcSizeHint;
-    }
-}
-
-size_t ZSTD_decompressStream_simpleArgs (
-                            ZSTD_DCtx* dctx,
-                            void* dst, size_t dstCapacity, size_t* dstPos,
-                      const void* src, size_t srcSize, size_t* srcPos)
-{
-    ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
-    ZSTD_inBuffer  input  = { src, srcSize, *srcPos };
-    /* ZSTD_compress_generic() will check validity of dstPos and srcPos */
-    size_t const cErr = ZSTD_decompressStream(dctx, &output, &input);
-    *dstPos = output.pos;
-    *srcPos = input.pos;
-    return cErr;
-}
diff --git a/dep/zstd/lib/decompress/zstd_decompress_block.c b/dep/zstd/lib/decompress/zstd_decompress_block.c
deleted file mode 100644
index 2e44d30d2..000000000
--- a/dep/zstd/lib/decompress/zstd_decompress_block.c
+++ /dev/null
@@ -1,2072 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-/* zstd_decompress_block :
- * this module takes care of decompressing _compressed_ block */
-
-/*-*******************************************************
-*  Dependencies
-*********************************************************/
-#include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
-#include "../common/compiler.h"    /* prefetch */
-#include "../common/cpu.h"         /* bmi2 */
-#include "../common/mem.h"         /* low level memory routines */
-#define FSE_STATIC_LINKING_ONLY
-#include "../common/fse.h"
-#define HUF_STATIC_LINKING_ONLY
-#include "../common/huf.h"
-#include "../common/zstd_internal.h"
-#include "zstd_decompress_internal.h"   /* ZSTD_DCtx */
-#include "zstd_ddict.h"  /* ZSTD_DDictDictContent */
-#include "zstd_decompress_block.h"
-
-/*_*******************************************************
-*  Macros
-**********************************************************/
-
-/* These two optional macros force the use one way or another of the two
- * ZSTD_decompressSequences implementations. You can't force in both directions
- * at the same time.
- */
-#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
-    defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
-#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
-#endif
-
-
-/*_*******************************************************
-*  Memory operations
-**********************************************************/
-static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
-
-
-/*-*************************************************************
- *   Block decoding
- ***************************************************************/
-
-/*! ZSTD_getcBlockSize() :
- *  Provides the size of compressed block from block header `src` */
-size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
-                          blockProperties_t* bpPtr)
-{
-    RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
-
-    {   U32 const cBlockHeader = MEM_readLE24(src);
-        U32 const cSize = cBlockHeader >> 3;
-        bpPtr->lastBlock = cBlockHeader & 1;
-        bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
-        bpPtr->origSize = cSize;   /* only useful for RLE */
-        if (bpPtr->blockType == bt_rle) return 1;
-        RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
-        return cSize;
-    }
-}
-
-/* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
-static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
-    const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
-{
-    if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
-    {
-        /* room for litbuffer to fit without read faulting */
-        dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
-        dctx->litBufferEnd = dctx->litBuffer + litSize;
-        dctx->litBufferLocation = ZSTD_in_dst;
-    }
-    else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
-    {
-        /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
-        if (splitImmediately) {
-            /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
-            dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
-            dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
-        }
-        else {
-            /* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */
-            dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
-            dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
-        }
-        dctx->litBufferLocation = ZSTD_split;
-    }
-    else
-    {
-        /* fits entirely within litExtraBuffer, so no split is necessary */
-        dctx->litBuffer = dctx->litExtraBuffer;
-        dctx->litBufferEnd = dctx->litBuffer + litSize;
-        dctx->litBufferLocation = ZSTD_not_in_dst;
-    }
-}
-
-/* Hidden declaration for fullbench */
-size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
-                          const void* src, size_t srcSize,
-                          void* dst, size_t dstCapacity, const streaming_operation streaming);
-/*! ZSTD_decodeLiteralsBlock() :
- * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
- * in the dstBuffer.  If there is room to do so, it will be stored in full in the excess dst space after where the current
- * block will be output.  Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
- * stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
- *
- * @return : nb of bytes read from src (< srcSize )
- *  note : symbol not declared but exposed for fullbench */
-size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
-                          const void* src, size_t srcSize,   /* note : srcSize < BLOCKSIZE */
-                          void* dst, size_t dstCapacity, const streaming_operation streaming)
-{
-    DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
-    RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
-
-    {   const BYTE* const istart = (const BYTE*) src;
-        symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
-
-        switch(litEncType)
-        {
-        case set_repeat:
-            DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
-            RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
-            ZSTD_FALLTHROUGH;
-
-        case set_compressed:
-            RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
-            {   size_t lhSize, litSize, litCSize;
-                U32 singleStream=0;
-                U32 const lhlCode = (istart[0] >> 2) & 3;
-                U32 const lhc = MEM_readLE32(istart);
-                size_t hufSuccess;
-                size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
-                switch(lhlCode)
-                {
-                case 0: case 1: default:   /* note : default is impossible, since lhlCode into [0..3] */
-                    /* 2 - 2 - 10 - 10 */
-                    singleStream = !lhlCode;
-                    lhSize = 3;
-                    litSize  = (lhc >> 4) & 0x3FF;
-                    litCSize = (lhc >> 14) & 0x3FF;
-                    break;
-                case 2:
-                    /* 2 - 2 - 14 - 14 */
-                    lhSize = 4;
-                    litSize  = (lhc >> 4) & 0x3FFF;
-                    litCSize = lhc >> 18;
-                    break;
-                case 3:
-                    /* 2 - 2 - 18 - 18 */
-                    lhSize = 5;
-                    litSize  = (lhc >> 4) & 0x3FFFF;
-                    litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
-                    break;
-                }
-                RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
-                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
-                RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
-                RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
-                ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
-
-                /* prefetch huffman table if cold */
-                if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
-                    PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
-                }
-
-                if (litEncType==set_repeat) {
-                    if (singleStream) {
-                        hufSuccess = HUF_decompress1X_usingDTable_bmi2(
-                            dctx->litBuffer, litSize, istart+lhSize, litCSize,
-                            dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
-                    } else {
-                        hufSuccess = HUF_decompress4X_usingDTable_bmi2(
-                            dctx->litBuffer, litSize, istart+lhSize, litCSize,
-                            dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
-                    }
-                } else {
-                    if (singleStream) {
-#if defined(HUF_FORCE_DECOMPRESS_X2)
-                        hufSuccess = HUF_decompress1X_DCtx_wksp(
-                            dctx->entropy.hufTable, dctx->litBuffer, litSize,
-                            istart+lhSize, litCSize, dctx->workspace,
-                            sizeof(dctx->workspace));
-#else
-                        hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
-                            dctx->entropy.hufTable, dctx->litBuffer, litSize,
-                            istart+lhSize, litCSize, dctx->workspace,
-                            sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
-#endif
-                    } else {
-                        hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
-                            dctx->entropy.hufTable, dctx->litBuffer, litSize,
-                            istart+lhSize, litCSize, dctx->workspace,
-                            sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
-                    }
-                }
-                if (dctx->litBufferLocation == ZSTD_split)
-                {
-                    ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
-                    ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
-                    dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
-                    dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
-                }
-
-                RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
-
-                dctx->litPtr = dctx->litBuffer;
-                dctx->litSize = litSize;
-                dctx->litEntropy = 1;
-                if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
-                return litCSize + lhSize;
-            }
-
-        case set_basic:
-            {   size_t litSize, lhSize;
-                U32 const lhlCode = ((istart[0]) >> 2) & 3;
-                size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
-                switch(lhlCode)
-                {
-                case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
-                    lhSize = 1;
-                    litSize = istart[0] >> 3;
-                    break;
-                case 1:
-                    lhSize = 2;
-                    litSize = MEM_readLE16(istart) >> 4;
-                    break;
-                case 3:
-                    lhSize = 3;
-                    litSize = MEM_readLE24(istart) >> 4;
-                    break;
-                }
-
-                RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
-                RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
-                ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
-                if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
-                    RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
-                    if (dctx->litBufferLocation == ZSTD_split)
-                    {
-                        ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
-                        ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
-                    }
-                    else
-                    {
-                        ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
-                    }
-                    dctx->litPtr = dctx->litBuffer;
-                    dctx->litSize = litSize;
-                    return lhSize+litSize;
-                }
-                /* direct reference into compressed stream */
-                dctx->litPtr = istart+lhSize;
-                dctx->litSize = litSize;
-                dctx->litBufferEnd = dctx->litPtr + litSize;
-                dctx->litBufferLocation = ZSTD_not_in_dst;
-                return lhSize+litSize;
-            }
-
-        case set_rle:
-            {   U32 const lhlCode = ((istart[0]) >> 2) & 3;
-                size_t litSize, lhSize;
-                size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
-                switch(lhlCode)
-                {
-                case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
-                    lhSize = 1;
-                    litSize = istart[0] >> 3;
-                    break;
-                case 1:
-                    lhSize = 2;
-                    litSize = MEM_readLE16(istart) >> 4;
-                    break;
-                case 3:
-                    lhSize = 3;
-                    litSize = MEM_readLE24(istart) >> 4;
-                    RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
-                    break;
-                }
-                RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
-                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
-                RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
-                ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
-                if (dctx->litBufferLocation == ZSTD_split)
-                {
-                    ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
-                    ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
-                }
-                else
-                {
-                    ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
-                }
-                dctx->litPtr = dctx->litBuffer;
-                dctx->litSize = litSize;
-                return lhSize+1;
-            }
-        default:
-            RETURN_ERROR(corruption_detected, "impossible");
-        }
-    }
-}
-
-/* Default FSE distribution tables.
- * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
- * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
- * They were generated programmatically with following method :
- * - start from default distributions, present in /lib/common/zstd_internal.h
- * - generate tables normally, using ZSTD_buildFSETable()
- * - printout the content of tables
- * - pretify output, report below, test with fuzzer to ensure it's correct */
-
-/* Default FSE distribution table for Literal Lengths */
-static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
-     {  1,  1,  1, LL_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
-     /* nextState, nbAddBits, nbBits, baseVal */
-     {  0,  0,  4,    0},  { 16,  0,  4,    0},
-     { 32,  0,  5,    1},  {  0,  0,  5,    3},
-     {  0,  0,  5,    4},  {  0,  0,  5,    6},
-     {  0,  0,  5,    7},  {  0,  0,  5,    9},
-     {  0,  0,  5,   10},  {  0,  0,  5,   12},
-     {  0,  0,  6,   14},  {  0,  1,  5,   16},
-     {  0,  1,  5,   20},  {  0,  1,  5,   22},
-     {  0,  2,  5,   28},  {  0,  3,  5,   32},
-     {  0,  4,  5,   48},  { 32,  6,  5,   64},
-     {  0,  7,  5,  128},  {  0,  8,  6,  256},
-     {  0, 10,  6, 1024},  {  0, 12,  6, 4096},
-     { 32,  0,  4,    0},  {  0,  0,  4,    1},
-     {  0,  0,  5,    2},  { 32,  0,  5,    4},
-     {  0,  0,  5,    5},  { 32,  0,  5,    7},
-     {  0,  0,  5,    8},  { 32,  0,  5,   10},
-     {  0,  0,  5,   11},  {  0,  0,  6,   13},
-     { 32,  1,  5,   16},  {  0,  1,  5,   18},
-     { 32,  1,  5,   22},  {  0,  2,  5,   24},
-     { 32,  3,  5,   32},  {  0,  3,  5,   40},
-     {  0,  6,  4,   64},  { 16,  6,  4,   64},
-     { 32,  7,  5,  128},  {  0,  9,  6,  512},
-     {  0, 11,  6, 2048},  { 48,  0,  4,    0},
-     { 16,  0,  4,    1},  { 32,  0,  5,    2},
-     { 32,  0,  5,    3},  { 32,  0,  5,    5},
-     { 32,  0,  5,    6},  { 32,  0,  5,    8},
-     { 32,  0,  5,    9},  { 32,  0,  5,   11},
-     { 32,  0,  5,   12},  {  0,  0,  6,   15},
-     { 32,  1,  5,   18},  { 32,  1,  5,   20},
-     { 32,  2,  5,   24},  { 32,  2,  5,   28},
-     { 32,  3,  5,   40},  { 32,  4,  5,   48},
-     {  0, 16,  6,65536},  {  0, 15,  6,32768},
-     {  0, 14,  6,16384},  {  0, 13,  6, 8192},
-};   /* LL_defaultDTable */
-
-/* Default FSE distribution table for Offset Codes */
-static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
-    {  1,  1,  1, OF_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
-    /* nextState, nbAddBits, nbBits, baseVal */
-    {  0,  0,  5,    0},     {  0,  6,  4,   61},
-    {  0,  9,  5,  509},     {  0, 15,  5,32765},
-    {  0, 21,  5,2097149},   {  0,  3,  5,    5},
-    {  0,  7,  4,  125},     {  0, 12,  5, 4093},
-    {  0, 18,  5,262141},    {  0, 23,  5,8388605},
-    {  0,  5,  5,   29},     {  0,  8,  4,  253},
-    {  0, 14,  5,16381},     {  0, 20,  5,1048573},
-    {  0,  2,  5,    1},     { 16,  7,  4,  125},
-    {  0, 11,  5, 2045},     {  0, 17,  5,131069},
-    {  0, 22,  5,4194301},   {  0,  4,  5,   13},
-    { 16,  8,  4,  253},     {  0, 13,  5, 8189},
-    {  0, 19,  5,524285},    {  0,  1,  5,    1},
-    { 16,  6,  4,   61},     {  0, 10,  5, 1021},
-    {  0, 16,  5,65533},     {  0, 28,  5,268435453},
-    {  0, 27,  5,134217725}, {  0, 26,  5,67108861},
-    {  0, 25,  5,33554429},  {  0, 24,  5,16777213},
-};   /* OF_defaultDTable */
-
-
-/* Default FSE distribution table for Match Lengths */
-static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
-    {  1,  1,  1, ML_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
-    /* nextState, nbAddBits, nbBits, baseVal */
-    {  0,  0,  6,    3},  {  0,  0,  4,    4},
-    { 32,  0,  5,    5},  {  0,  0,  5,    6},
-    {  0,  0,  5,    8},  {  0,  0,  5,    9},
-    {  0,  0,  5,   11},  {  0,  0,  6,   13},
-    {  0,  0,  6,   16},  {  0,  0,  6,   19},
-    {  0,  0,  6,   22},  {  0,  0,  6,   25},
-    {  0,  0,  6,   28},  {  0,  0,  6,   31},
-    {  0,  0,  6,   34},  {  0,  1,  6,   37},
-    {  0,  1,  6,   41},  {  0,  2,  6,   47},
-    {  0,  3,  6,   59},  {  0,  4,  6,   83},
-    {  0,  7,  6,  131},  {  0,  9,  6,  515},
-    { 16,  0,  4,    4},  {  0,  0,  4,    5},
-    { 32,  0,  5,    6},  {  0,  0,  5,    7},
-    { 32,  0,  5,    9},  {  0,  0,  5,   10},
-    {  0,  0,  6,   12},  {  0,  0,  6,   15},
-    {  0,  0,  6,   18},  {  0,  0,  6,   21},
-    {  0,  0,  6,   24},  {  0,  0,  6,   27},
-    {  0,  0,  6,   30},  {  0,  0,  6,   33},
-    {  0,  1,  6,   35},  {  0,  1,  6,   39},
-    {  0,  2,  6,   43},  {  0,  3,  6,   51},
-    {  0,  4,  6,   67},  {  0,  5,  6,   99},
-    {  0,  8,  6,  259},  { 32,  0,  4,    4},
-    { 48,  0,  4,    4},  { 16,  0,  4,    5},
-    { 32,  0,  5,    7},  { 32,  0,  5,    8},
-    { 32,  0,  5,   10},  { 32,  0,  5,   11},
-    {  0,  0,  6,   14},  {  0,  0,  6,   17},
-    {  0,  0,  6,   20},  {  0,  0,  6,   23},
-    {  0,  0,  6,   26},  {  0,  0,  6,   29},
-    {  0,  0,  6,   32},  {  0, 16,  6,65539},
-    {  0, 15,  6,32771},  {  0, 14,  6,16387},
-    {  0, 13,  6, 8195},  {  0, 12,  6, 4099},
-    {  0, 11,  6, 2051},  {  0, 10,  6, 1027},
-};   /* ML_defaultDTable */
-
-
-static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
-{
-    void* ptr = dt;
-    ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
-    ZSTD_seqSymbol* const cell = dt + 1;
-
-    DTableH->tableLog = 0;
-    DTableH->fastMode = 0;
-
-    cell->nbBits = 0;
-    cell->nextState = 0;
-    assert(nbAddBits < 255);
-    cell->nbAdditionalBits = nbAddBits;
-    cell->baseValue = baseValue;
-}
-
-
-/* ZSTD_buildFSETable() :
- * generate FSE decoding table for one symbol (ll, ml or off)
- * cannot fail if input is valid =>
- * all inputs are presumed validated at this stage */
-FORCE_INLINE_TEMPLATE
-void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
-            const short* normalizedCounter, unsigned maxSymbolValue,
-            const U32* baseValue, const U8* nbAdditionalBits,
-            unsigned tableLog, void* wksp, size_t wkspSize)
-{
-    ZSTD_seqSymbol* const tableDecode = dt+1;
-    U32 const maxSV1 = maxSymbolValue + 1;
-    U32 const tableSize = 1 << tableLog;
-
-    U16* symbolNext = (U16*)wksp;
-    BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
-    U32 highThreshold = tableSize - 1;
-
-
-    /* Sanity Checks */
-    assert(maxSymbolValue <= MaxSeq);
-    assert(tableLog <= MaxFSELog);
-    assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
-    (void)wkspSize;
-    /* Init, lay down lowprob symbols */
-    {   ZSTD_seqSymbol_header DTableH;
-        DTableH.tableLog = tableLog;
-        DTableH.fastMode = 1;
-        {   S16 const largeLimit= (S16)(1 << (tableLog-1));
-            U32 s;
-            for (s=0; s<maxSV1; s++) {
-                if (normalizedCounter[s]==-1) {
-                    tableDecode[highThreshold--].baseValue = s;
-                    symbolNext[s] = 1;
-                } else {
-                    if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
-                    assert(normalizedCounter[s]>=0);
-                    symbolNext[s] = (U16)normalizedCounter[s];
-        }   }   }
-        ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
-    }
-
-    /* Spread symbols */
-    assert(tableSize <= 512);
-    /* Specialized symbol spreading for the case when there are
-     * no low probability (-1 count) symbols. When compressing
-     * small blocks we avoid low probability symbols to hit this
-     * case, since header decoding speed matters more.
-     */
-    if (highThreshold == tableSize - 1) {
-        size_t const tableMask = tableSize-1;
-        size_t const step = FSE_TABLESTEP(tableSize);
-        /* First lay down the symbols in order.
-         * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
-         * misses since small blocks generally have small table logs, so nearly
-         * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
-         * our buffer to handle the over-write.
-         */
-        {
-            U64 const add = 0x0101010101010101ull;
-            size_t pos = 0;
-            U64 sv = 0;
-            U32 s;
-            for (s=0; s<maxSV1; ++s, sv += add) {
-                int i;
-                int const n = normalizedCounter[s];
-                MEM_write64(spread + pos, sv);
-                for (i = 8; i < n; i += 8) {
-                    MEM_write64(spread + pos + i, sv);
-                }
-                pos += n;
-            }
-        }
-        /* Now we spread those positions across the table.
-         * The benefit of doing it in two stages is that we avoid the the
-         * variable size inner loop, which caused lots of branch misses.
-         * Now we can run through all the positions without any branch misses.
-         * We unroll the loop twice, since that is what emperically worked best.
-         */
-        {
-            size_t position = 0;
-            size_t s;
-            size_t const unroll = 2;
-            assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
-            for (s = 0; s < (size_t)tableSize; s += unroll) {
-                size_t u;
-                for (u = 0; u < unroll; ++u) {
-                    size_t const uPosition = (position + (u * step)) & tableMask;
-                    tableDecode[uPosition].baseValue = spread[s + u];
-                }
-                position = (position + (unroll * step)) & tableMask;
-            }
-            assert(position == 0);
-        }
-    } else {
-        U32 const tableMask = tableSize-1;
-        U32 const step = FSE_TABLESTEP(tableSize);
-        U32 s, position = 0;
-        for (s=0; s<maxSV1; s++) {
-            int i;
-            int const n = normalizedCounter[s];
-            for (i=0; i<n; i++) {
-                tableDecode[position].baseValue = s;
-                position = (position + step) & tableMask;
-                while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
-        }   }
-        assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
-    }
-
-    /* Build Decoding table */
-    {
-        U32 u;
-        for (u=0; u<tableSize; u++) {
-            U32 const symbol = tableDecode[u].baseValue;
-            U32 const nextState = symbolNext[symbol]++;
-            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
-            tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
-            assert(nbAdditionalBits[symbol] < 255);
-            tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
-            tableDecode[u].baseValue = baseValue[symbol];
-        }
-    }
-}
-
-/* Avoids the FORCE_INLINE of the _body() function. */
-static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
-            const short* normalizedCounter, unsigned maxSymbolValue,
-            const U32* baseValue, const U8* nbAdditionalBits,
-            unsigned tableLog, void* wksp, size_t wkspSize)
-{
-    ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
-            baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
-}
-
-#if DYNAMIC_BMI2
-BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
-            const short* normalizedCounter, unsigned maxSymbolValue,
-            const U32* baseValue, const U8* nbAdditionalBits,
-            unsigned tableLog, void* wksp, size_t wkspSize)
-{
-    ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
-            baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
-}
-#endif
-
-void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
-            const short* normalizedCounter, unsigned maxSymbolValue,
-            const U32* baseValue, const U8* nbAdditionalBits,
-            unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
-{
-#if DYNAMIC_BMI2
-    if (bmi2) {
-        ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
-                baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
-        return;
-    }
-#endif
-    (void)bmi2;
-    ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
-            baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
-}
-
-
-/*! ZSTD_buildSeqTable() :
- * @return : nb bytes read from src,
- *           or an error code if it fails */
-static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
-                                 symbolEncodingType_e type, unsigned max, U32 maxLog,
-                                 const void* src, size_t srcSize,
-                                 const U32* baseValue, const U8* nbAdditionalBits,
-                                 const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
-                                 int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
-                                 int bmi2)
-{
-    switch(type)
-    {
-    case set_rle :
-        RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
-        RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
-        {   U32 const symbol = *(const BYTE*)src;
-            U32 const baseline = baseValue[symbol];
-            U8 const nbBits = nbAdditionalBits[symbol];
-            ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
-        }
-        *DTablePtr = DTableSpace;
-        return 1;
-    case set_basic :
-        *DTablePtr = defaultTable;
-        return 0;
-    case set_repeat:
-        RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
-        /* prefetch FSE table if used */
-        if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
-            const void* const pStart = *DTablePtr;
-            size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
-            PREFETCH_AREA(pStart, pSize);
-        }
-        return 0;
-    case set_compressed :
-        {   unsigned tableLog;
-            S16 norm[MaxSeq+1];
-            size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
-            RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
-            RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
-            ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
-            *DTablePtr = DTableSpace;
-            return headerSize;
-        }
-    default :
-        assert(0);
-        RETURN_ERROR(GENERIC, "impossible");
-    }
-}
-
-size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
-                             const void* src, size_t srcSize)
-{
-    const BYTE* const istart = (const BYTE*)src;
-    const BYTE* const iend = istart + srcSize;
-    const BYTE* ip = istart;
-    int nbSeq;
-    DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
-
-    /* check */
-    RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
-
-    /* SeqHead */
-    nbSeq = *ip++;
-    if (!nbSeq) {
-        *nbSeqPtr=0;
-        RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
-        return 1;
-    }
-    if (nbSeq > 0x7F) {
-        if (nbSeq == 0xFF) {
-            RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
-            nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
-            ip+=2;
-        } else {
-            RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
-            nbSeq = ((nbSeq-0x80)<<8) + *ip++;
-        }
-    }
-    *nbSeqPtr = nbSeq;
-
-    /* FSE table descriptors */
-    RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
-    {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
-        symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
-        symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
-        ip++;
-
-        /* Build DTables */
-        {   size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
-                                                      LLtype, MaxLL, LLFSELog,
-                                                      ip, iend-ip,
-                                                      LL_base, LL_bits,
-                                                      LL_defaultDTable, dctx->fseEntropy,
-                                                      dctx->ddictIsCold, nbSeq,
-                                                      dctx->workspace, sizeof(dctx->workspace),
-                                                      ZSTD_DCtx_get_bmi2(dctx));
-            RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
-            ip += llhSize;
-        }
-
-        {   size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
-                                                      OFtype, MaxOff, OffFSELog,
-                                                      ip, iend-ip,
-                                                      OF_base, OF_bits,
-                                                      OF_defaultDTable, dctx->fseEntropy,
-                                                      dctx->ddictIsCold, nbSeq,
-                                                      dctx->workspace, sizeof(dctx->workspace),
-                                                      ZSTD_DCtx_get_bmi2(dctx));
-            RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
-            ip += ofhSize;
-        }
-
-        {   size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
-                                                      MLtype, MaxML, MLFSELog,
-                                                      ip, iend-ip,
-                                                      ML_base, ML_bits,
-                                                      ML_defaultDTable, dctx->fseEntropy,
-                                                      dctx->ddictIsCold, nbSeq,
-                                                      dctx->workspace, sizeof(dctx->workspace),
-                                                      ZSTD_DCtx_get_bmi2(dctx));
-            RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
-            ip += mlhSize;
-        }
-    }
-
-    return ip-istart;
-}
-
-
-typedef struct {
-    size_t litLength;
-    size_t matchLength;
-    size_t offset;
-} seq_t;
-
-typedef struct {
-    size_t state;
-    const ZSTD_seqSymbol* table;
-} ZSTD_fseState;
-
-typedef struct {
-    BIT_DStream_t DStream;
-    ZSTD_fseState stateLL;
-    ZSTD_fseState stateOffb;
-    ZSTD_fseState stateML;
-    size_t prevOffset[ZSTD_REP_NUM];
-} seqState_t;
-
-/*! ZSTD_overlapCopy8() :
- *  Copies 8 bytes from ip to op and updates op and ip where ip <= op.
- *  If the offset is < 8 then the offset is spread to at least 8 bytes.
- *
- *  Precondition: *ip <= *op
- *  Postcondition: *op - *op >= 8
- */
-HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
-    assert(*ip <= *op);
-    if (offset < 8) {
-        /* close range match, overlap */
-        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
-        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
-        int const sub2 = dec64table[offset];
-        (*op)[0] = (*ip)[0];
-        (*op)[1] = (*ip)[1];
-        (*op)[2] = (*ip)[2];
-        (*op)[3] = (*ip)[3];
-        *ip += dec32table[offset];
-        ZSTD_copy4(*op+4, *ip);
-        *ip -= sub2;
-    } else {
-        ZSTD_copy8(*op, *ip);
-    }
-    *ip += 8;
-    *op += 8;
-    assert(*op - *ip >= 8);
-}
-
-/*! ZSTD_safecopy() :
- *  Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
- *  and write up to 16 bytes past oend_w (op >= oend_w is allowed).
- *  This function is only called in the uncommon case where the sequence is near the end of the block. It
- *  should be fast for a single long sequence, but can be slow for several short sequences.
- *
- *  @param ovtype controls the overlap detection
- *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
- *         - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
- *           The src buffer must be before the dst buffer.
- */
-static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
-    ptrdiff_t const diff = op - ip;
-    BYTE* const oend = op + length;
-
-    assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
-           (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
-
-    if (length < 8) {
-        /* Handle short lengths. */
-        while (op < oend) *op++ = *ip++;
-        return;
-    }
-    if (ovtype == ZSTD_overlap_src_before_dst) {
-        /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
-        assert(length >= 8);
-        ZSTD_overlapCopy8(&op, &ip, diff);
-        length -= 8;
-        assert(op - ip >= 8);
-        assert(op <= oend);
-    }
-
-    if (oend <= oend_w) {
-        /* No risk of overwrite. */
-        ZSTD_wildcopy(op, ip, length, ovtype);
-        return;
-    }
-    if (op <= oend_w) {
-        /* Wildcopy until we get close to the end. */
-        assert(oend > oend_w);
-        ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
-        ip += oend_w - op;
-        op += oend_w - op;
-    }
-    /* Handle the leftovers. */
-    while (op < oend) *op++ = *ip++;
-}
-
-/* ZSTD_safecopyDstBeforeSrc():
- * This version allows overlap with dst before src, or handles the non-overlap case with dst after src
- * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
-static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
-    ptrdiff_t const diff = op - ip;
-    BYTE* const oend = op + length;
-
-    if (length < 8 || diff > -8) {
-        /* Handle short lengths, close overlaps, and dst not before src. */
-        while (op < oend) *op++ = *ip++;
-        return;
-    }
-
-    if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
-        ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
-        ip += oend - WILDCOPY_OVERLENGTH - op;
-        op += oend - WILDCOPY_OVERLENGTH - op;
-    }
-
-    /* Handle the leftovers. */
-    while (op < oend) *op++ = *ip++;
-}
-
-/* ZSTD_execSequenceEnd():
- * This version handles cases that are near the end of the output buffer. It requires
- * more careful checks to make sure there is no overflow. By separating out these hard
- * and unlikely cases, we can speed up the common cases.
- *
- * NOTE: This function needs to be fast for a single long sequence, but doesn't need
- * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
- */
-FORCE_NOINLINE
-size_t ZSTD_execSequenceEnd(BYTE* op,
-    BYTE* const oend, seq_t sequence,
-    const BYTE** litPtr, const BYTE* const litLimit,
-    const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
-{
-    BYTE* const oLitEnd = op + sequence.litLength;
-    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
-    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
-    const BYTE* match = oLitEnd - sequence.offset;
-    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
-
-    /* bounds checks : careful of address space overflow in 32-bit mode */
-    RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
-    RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
-    assert(op < op + sequenceLength);
-    assert(oLitEnd < op + sequenceLength);
-
-    /* copy literals */
-    ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
-    op = oLitEnd;
-    *litPtr = iLitEnd;
-
-    /* copy Match */
-    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
-        /* offset beyond prefix */
-        RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
-        match = dictEnd - (prefixStart - match);
-        if (match + sequence.matchLength <= dictEnd) {
-            ZSTD_memmove(oLitEnd, match, sequence.matchLength);
-            return sequenceLength;
-        }
-        /* span extDict & currentPrefixSegment */
-        {   size_t const length1 = dictEnd - match;
-        ZSTD_memmove(oLitEnd, match, length1);
-        op = oLitEnd + length1;
-        sequence.matchLength -= length1;
-        match = prefixStart;
-        }
-    }
-    ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
-    return sequenceLength;
-}
-
-/* ZSTD_execSequenceEndSplitLitBuffer():
- * This version is intended to be used during instances where the litBuffer is still split.  It is kept separate to avoid performance impact for the good case.
- */
-FORCE_NOINLINE
-size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
-    BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
-    const BYTE** litPtr, const BYTE* const litLimit,
-    const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
-{
-    BYTE* const oLitEnd = op + sequence.litLength;
-    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
-    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
-    const BYTE* match = oLitEnd - sequence.offset;
-
-
-    /* bounds checks : careful of address space overflow in 32-bit mode */
-    RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
-    RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
-    assert(op < op + sequenceLength);
-    assert(oLitEnd < op + sequenceLength);
-
-    /* copy literals */
-    RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
-    ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
-    op = oLitEnd;
-    *litPtr = iLitEnd;
-
-    /* copy Match */
-    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
-        /* offset beyond prefix */
-        RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
-        match = dictEnd - (prefixStart - match);
-        if (match + sequence.matchLength <= dictEnd) {
-            ZSTD_memmove(oLitEnd, match, sequence.matchLength);
-            return sequenceLength;
-        }
-        /* span extDict & currentPrefixSegment */
-        {   size_t const length1 = dictEnd - match;
-        ZSTD_memmove(oLitEnd, match, length1);
-        op = oLitEnd + length1;
-        sequence.matchLength -= length1;
-        match = prefixStart;
-        }
-    }
-    ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
-    return sequenceLength;
-}
-
-HINT_INLINE
-size_t ZSTD_execSequence(BYTE* op,
-    BYTE* const oend, seq_t sequence,
-    const BYTE** litPtr, const BYTE* const litLimit,
-    const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
-{
-    BYTE* const oLitEnd = op + sequence.litLength;
-    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
-    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
-    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;   /* risk : address space underflow on oend=NULL */
-    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
-    const BYTE* match = oLitEnd - sequence.offset;
-
-    assert(op != NULL /* Precondition */);
-    assert(oend_w < oend /* No underflow */);
-    /* Handle edge cases in a slow path:
-     *   - Read beyond end of literals
-     *   - Match end is within WILDCOPY_OVERLIMIT of oend
-     *   - 32-bit mode and the match length overflows
-     */
-    if (UNLIKELY(
-        iLitEnd > litLimit ||
-        oMatchEnd > oend_w ||
-        (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
-        return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
-
-    /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
-    assert(op <= oLitEnd /* No overflow */);
-    assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
-    assert(oMatchEnd <= oend /* No underflow */);
-    assert(iLitEnd <= litLimit /* Literal length is in bounds */);
-    assert(oLitEnd <= oend_w /* Can wildcopy literals */);
-    assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
-
-    /* Copy Literals:
-     * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
-     * We likely don't need the full 32-byte wildcopy.
-     */
-    assert(WILDCOPY_OVERLENGTH >= 16);
-    ZSTD_copy16(op, (*litPtr));
-    if (UNLIKELY(sequence.litLength > 16)) {
-        ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
-    }
-    op = oLitEnd;
-    *litPtr = iLitEnd;   /* update for next sequence */
-
-    /* Copy Match */
-    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
-        /* offset beyond prefix -> go into extDict */
-        RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
-        match = dictEnd + (match - prefixStart);
-        if (match + sequence.matchLength <= dictEnd) {
-            ZSTD_memmove(oLitEnd, match, sequence.matchLength);
-            return sequenceLength;
-        }
-        /* span extDict & currentPrefixSegment */
-        {   size_t const length1 = dictEnd - match;
-        ZSTD_memmove(oLitEnd, match, length1);
-        op = oLitEnd + length1;
-        sequence.matchLength -= length1;
-        match = prefixStart;
-        }
-    }
-    /* Match within prefix of 1 or more bytes */
-    assert(op <= oMatchEnd);
-    assert(oMatchEnd <= oend_w);
-    assert(match >= prefixStart);
-    assert(sequence.matchLength >= 1);
-
-    /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
-     * without overlap checking.
-     */
-    if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
-        /* We bet on a full wildcopy for matches, since we expect matches to be
-         * longer than literals (in general). In silesia, ~10% of matches are longer
-         * than 16 bytes.
-         */
-        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
-        return sequenceLength;
-    }
-    assert(sequence.offset < WILDCOPY_VECLEN);
-
-    /* Copy 8 bytes and spread the offset to be >= 8. */
-    ZSTD_overlapCopy8(&op, &match, sequence.offset);
-
-    /* If the match length is > 8 bytes, then continue with the wildcopy. */
-    if (sequence.matchLength > 8) {
-        assert(op < oMatchEnd);
-        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
-    }
-    return sequenceLength;
-}
-
-HINT_INLINE
-size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
-    BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
-    const BYTE** litPtr, const BYTE* const litLimit,
-    const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
-{
-    BYTE* const oLitEnd = op + sequence.litLength;
-    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
-    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
-    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
-    const BYTE* match = oLitEnd - sequence.offset;
-
-    assert(op != NULL /* Precondition */);
-    assert(oend_w < oend /* No underflow */);
-    /* Handle edge cases in a slow path:
-     *   - Read beyond end of literals
-     *   - Match end is within WILDCOPY_OVERLIMIT of oend
-     *   - 32-bit mode and the match length overflows
-     */
-    if (UNLIKELY(
-            iLitEnd > litLimit ||
-            oMatchEnd > oend_w ||
-            (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
-        return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
-
-    /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
-    assert(op <= oLitEnd /* No overflow */);
-    assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
-    assert(oMatchEnd <= oend /* No underflow */);
-    assert(iLitEnd <= litLimit /* Literal length is in bounds */);
-    assert(oLitEnd <= oend_w /* Can wildcopy literals */);
-    assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
-
-    /* Copy Literals:
-     * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
-     * We likely don't need the full 32-byte wildcopy.
-     */
-    assert(WILDCOPY_OVERLENGTH >= 16);
-    ZSTD_copy16(op, (*litPtr));
-    if (UNLIKELY(sequence.litLength > 16)) {
-        ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
-    }
-    op = oLitEnd;
-    *litPtr = iLitEnd;   /* update for next sequence */
-
-    /* Copy Match */
-    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
-        /* offset beyond prefix -> go into extDict */
-        RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
-        match = dictEnd + (match - prefixStart);
-        if (match + sequence.matchLength <= dictEnd) {
-            ZSTD_memmove(oLitEnd, match, sequence.matchLength);
-            return sequenceLength;
-        }
-        /* span extDict & currentPrefixSegment */
-        {   size_t const length1 = dictEnd - match;
-            ZSTD_memmove(oLitEnd, match, length1);
-            op = oLitEnd + length1;
-            sequence.matchLength -= length1;
-            match = prefixStart;
-    }   }
-    /* Match within prefix of 1 or more bytes */
-    assert(op <= oMatchEnd);
-    assert(oMatchEnd <= oend_w);
-    assert(match >= prefixStart);
-    assert(sequence.matchLength >= 1);
-
-    /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
-     * without overlap checking.
-     */
-    if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
-        /* We bet on a full wildcopy for matches, since we expect matches to be
-         * longer than literals (in general). In silesia, ~10% of matches are longer
-         * than 16 bytes.
-         */
-        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
-        return sequenceLength;
-    }
-    assert(sequence.offset < WILDCOPY_VECLEN);
-
-    /* Copy 8 bytes and spread the offset to be >= 8. */
-    ZSTD_overlapCopy8(&op, &match, sequence.offset);
-
-    /* If the match length is > 8 bytes, then continue with the wildcopy. */
-    if (sequence.matchLength > 8) {
-        assert(op < oMatchEnd);
-        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
-    }
-    return sequenceLength;
-}
-
-
-static void
-ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
-{
-    const void* ptr = dt;
-    const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
-    DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
-    DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
-                (U32)DStatePtr->state, DTableH->tableLog);
-    BIT_reloadDStream(bitD);
-    DStatePtr->table = dt + 1;
-}
-
-FORCE_INLINE_TEMPLATE void
-ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
-{
-    size_t const lowBits = BIT_readBits(bitD, nbBits);
-    DStatePtr->state = nextState + lowBits;
-}
-
-/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
- * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
- * bits before reloading. This value is the maximum number of bytes we read
- * after reloading when we are decoding long offsets.
- */
-#define LONG_OFFSETS_MAX_EXTRA_BITS_32                       \
-    (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32       \
-        ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32  \
-        : 0)
-
-typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
-
-FORCE_INLINE_TEMPLATE seq_t
-ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
-{
-    seq_t seq;
-    const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
-    const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
-    const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
-    seq.matchLength = mlDInfo->baseValue;
-    seq.litLength = llDInfo->baseValue;
-    {   U32 const ofBase = ofDInfo->baseValue;
-        BYTE const llBits = llDInfo->nbAdditionalBits;
-        BYTE const mlBits = mlDInfo->nbAdditionalBits;
-        BYTE const ofBits = ofDInfo->nbAdditionalBits;
-        BYTE const totalBits = llBits+mlBits+ofBits;
-
-        U16 const llNext = llDInfo->nextState;
-        U16 const mlNext = mlDInfo->nextState;
-        U16 const ofNext = ofDInfo->nextState;
-        U32 const llnbBits = llDInfo->nbBits;
-        U32 const mlnbBits = mlDInfo->nbBits;
-        U32 const ofnbBits = ofDInfo->nbBits;
-        /*
-         * As gcc has better branch and block analyzers, sometimes it is only
-         * valuable to mark likelyness for clang, it gives around 3-4% of
-         * performance.
-         */
-
-        /* sequence */
-        {   size_t offset;
-    #if defined(__clang__)
-            if (LIKELY(ofBits > 1)) {
-    #else
-            if (ofBits > 1) {
-    #endif
-                ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
-                ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
-                assert(ofBits <= MaxOff);
-                if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
-                    U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
-                    offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
-                    BIT_reloadDStream(&seqState->DStream);
-                    if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
-                    assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32);   /* to avoid another reload */
-                } else {
-                    offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
-                    if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
-                }
-                seqState->prevOffset[2] = seqState->prevOffset[1];
-                seqState->prevOffset[1] = seqState->prevOffset[0];
-                seqState->prevOffset[0] = offset;
-            } else {
-                U32 const ll0 = (llDInfo->baseValue == 0);
-                if (LIKELY((ofBits == 0))) {
-                    offset = seqState->prevOffset[ll0];
-                    seqState->prevOffset[1] = seqState->prevOffset[!ll0];
-                    seqState->prevOffset[0] = offset;
-                } else {
-                    offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
-                    {   size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
-                        temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
-                        if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
-                        seqState->prevOffset[1] = seqState->prevOffset[0];
-                        seqState->prevOffset[0] = offset = temp;
-            }   }   }
-            seq.offset = offset;
-        }
-
-    #if defined(__clang__)
-        if (UNLIKELY(mlBits > 0))
-    #else
-        if (mlBits > 0)
-    #endif
-            seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
-
-        if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
-            BIT_reloadDStream(&seqState->DStream);
-        if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
-            BIT_reloadDStream(&seqState->DStream);
-        /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
-        ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
-
-    #if defined(__clang__)
-        if (UNLIKELY(llBits > 0))
-    #else
-        if (llBits > 0)
-    #endif
-            seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
-
-        if (MEM_32bits())
-            BIT_reloadDStream(&seqState->DStream);
-
-        DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
-                    (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
-
-        ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits);    /* <=  9 bits */
-        ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits);    /* <=  9 bits */
-        if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
-        ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits);  /* <=  8 bits */
-    }
-
-    return seq;
-}
-
-#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
-MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
-{
-    size_t const windowSize = dctx->fParams.windowSize;
-    /* No dictionary used. */
-    if (dctx->dictContentEndForFuzzing == NULL) return 0;
-    /* Dictionary is our prefix. */
-    if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
-    /* Dictionary is not our ext-dict. */
-    if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
-    /* Dictionary is not within our window size. */
-    if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
-    /* Dictionary is active. */
-    return 1;
-}
-
-MEM_STATIC void ZSTD_assertValidSequence(
-        ZSTD_DCtx const* dctx,
-        BYTE const* op, BYTE const* oend,
-        seq_t const seq,
-        BYTE const* prefixStart, BYTE const* virtualStart)
-{
-#if DEBUGLEVEL >= 1
-    size_t const windowSize = dctx->fParams.windowSize;
-    size_t const sequenceSize = seq.litLength + seq.matchLength;
-    BYTE const* const oLitEnd = op + seq.litLength;
-    DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
-            (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
-    assert(op <= oend);
-    assert((size_t)(oend - op) >= sequenceSize);
-    assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
-    if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
-        size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
-        /* Offset must be within the dictionary. */
-        assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
-        assert(seq.offset <= windowSize + dictSize);
-    } else {
-        /* Offset must be within our window. */
-        assert(seq.offset <= windowSize);
-    }
-#else
-    (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
-#endif
-}
-#endif
-
-#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
-
-
-FORCE_INLINE_TEMPLATE size_t
-DONT_VECTORIZE
-ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
-                               void* dst, size_t maxDstSize,
-                         const void* seqStart, size_t seqSize, int nbSeq,
-                         const ZSTD_longOffset_e isLongOffset,
-                         const int frame)
-{
-    const BYTE* ip = (const BYTE*)seqStart;
-    const BYTE* const iend = ip + seqSize;
-    BYTE* const ostart = (BYTE*)dst;
-    BYTE* const oend = ostart + maxDstSize;
-    BYTE* op = ostart;
-    const BYTE* litPtr = dctx->litPtr;
-    const BYTE* litBufferEnd = dctx->litBufferEnd;
-    const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
-    const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
-    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
-    DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");
-    (void)frame;
-
-    /* Regen sequences */
-    if (nbSeq) {
-        seqState_t seqState;
-        dctx->fseEntropy = 1;
-        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
-        RETURN_ERROR_IF(
-            ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
-            corruption_detected, "");
-        ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
-        ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
-        ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
-        assert(dst != NULL);
-
-        ZSTD_STATIC_ASSERT(
-                BIT_DStream_unfinished < BIT_DStream_completed &&
-                BIT_DStream_endOfBuffer < BIT_DStream_completed &&
-                BIT_DStream_completed < BIT_DStream_overflow);
-
-        /* decompress without overrunning litPtr begins */
-        {
-            seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
-            /* Align the decompression loop to 32 + 16 bytes.
-                *
-                * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
-                * speed swings based on the alignment of the decompression loop. This
-                * performance swing is caused by parts of the decompression loop falling
-                * out of the DSB. The entire decompression loop should fit in the DSB,
-                * when it can't we get much worse performance. You can measure if you've
-                * hit the good case or the bad case with this perf command for some
-                * compressed file test.zst:
-                *
-                *   perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
-                *             -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
-                *
-                * If you see most cycles served out of the MITE you've hit the bad case.
-                * If you see most cycles served out of the DSB you've hit the good case.
-                * If it is pretty even then you may be in an okay case.
-                *
-                * This issue has been reproduced on the following CPUs:
-                *   - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
-                *               Use Instruments->Counters to get DSB/MITE cycles.
-                *               I never got performance swings, but I was able to
-                *               go from the good case of mostly DSB to half of the
-                *               cycles served from MITE.
-                *   - Coffeelake: Intel i9-9900k
-                *   - Coffeelake: Intel i7-9700k
-                *
-                * I haven't been able to reproduce the instability or DSB misses on any
-                * of the following CPUS:
-                *   - Haswell
-                *   - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
-                *   - Skylake
-                *
-                * Alignment is done for each of the three major decompression loops:
-                *   - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
-                *   - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
-                *   - ZSTD_decompressSequences_body
-                * Alignment choices are made to minimize large swings on bad cases and influence on performance
-                * from changes external to this code, rather than to overoptimize on the current commit.
-                *
-                * If you are seeing performance stability this script can help test.
-                * It tests on 4 commits in zstd where I saw performance change.
-                *
-                *   https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
-                */
-#if defined(__GNUC__) && defined(__x86_64__)
-            __asm__(".p2align 6");
-#  if __GNUC__ >= 7
-	    /* good for gcc-7, gcc-9, and gcc-11 */
-            __asm__("nop");
-            __asm__(".p2align 5");
-            __asm__("nop");
-            __asm__(".p2align 4");
-#    if __GNUC__ == 8 || __GNUC__ == 10
-	    /* good for gcc-8 and gcc-10 */
-            __asm__("nop");
-            __asm__(".p2align 3");
-#    endif
-#  endif
-#endif
-
-            /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
-            for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
-                size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
-#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
-                assert(!ZSTD_isError(oneSeqSize));
-                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
-#endif
-                if (UNLIKELY(ZSTD_isError(oneSeqSize)))
-                    return oneSeqSize;
-                DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
-                op += oneSeqSize;
-                if (UNLIKELY(!--nbSeq))
-                    break;
-                BIT_reloadDStream(&(seqState.DStream));
-                sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
-            }
-
-            /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
-            if (nbSeq > 0) {
-                const size_t leftoverLit = dctx->litBufferEnd - litPtr;
-                if (leftoverLit)
-                {
-                    RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
-                    ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
-                    sequence.litLength -= leftoverLit;
-                    op += leftoverLit;
-                }
-                litPtr = dctx->litExtraBuffer;
-                litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
-                dctx->litBufferLocation = ZSTD_not_in_dst;
-                {
-                    size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
-#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
-                    assert(!ZSTD_isError(oneSeqSize));
-                    if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
-#endif
-                    if (UNLIKELY(ZSTD_isError(oneSeqSize)))
-                        return oneSeqSize;
-                    DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
-                    op += oneSeqSize;
-                    if (--nbSeq)
-                        BIT_reloadDStream(&(seqState.DStream));
-                }
-            }
-        }
-
-        if (nbSeq > 0) /* there is remaining lit from extra buffer */
-        {
-
-#if defined(__GNUC__) && defined(__x86_64__)
-            __asm__(".p2align 6");
-            __asm__("nop");
-#  if __GNUC__ != 7
-            /* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
-            __asm__(".p2align 4");
-            __asm__("nop");
-            __asm__(".p2align 3");
-#  elif __GNUC__ >= 11
-            __asm__(".p2align 3");
-#  else
-            __asm__(".p2align 5");
-            __asm__("nop");
-            __asm__(".p2align 3");
-#  endif
-#endif
-
-            for (; ; ) {
-                seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
-                size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
-#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
-                assert(!ZSTD_isError(oneSeqSize));
-                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
-#endif
-                if (UNLIKELY(ZSTD_isError(oneSeqSize)))
-                    return oneSeqSize;
-                DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
-                op += oneSeqSize;
-                if (UNLIKELY(!--nbSeq))
-                    break;
-                BIT_reloadDStream(&(seqState.DStream));
-            }
-        }
-
-        /* check if reached exact end */
-        DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
-        RETURN_ERROR_IF(nbSeq, corruption_detected, "");
-        RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
-        /* save reps for next block */
-        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
-    }
-
-    /* last literal segment */
-    if (dctx->litBufferLocation == ZSTD_split)  /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
-    {
-        size_t const lastLLSize = litBufferEnd - litPtr;
-        RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
-        if (op != NULL) {
-            ZSTD_memmove(op, litPtr, lastLLSize);
-            op += lastLLSize;
-        }
-        litPtr = dctx->litExtraBuffer;
-        litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
-        dctx->litBufferLocation = ZSTD_not_in_dst;
-    }
-    {   size_t const lastLLSize = litBufferEnd - litPtr;
-        RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
-        if (op != NULL) {
-            ZSTD_memcpy(op, litPtr, lastLLSize);
-            op += lastLLSize;
-        }
-    }
-
-    return op-ostart;
-}
-
-FORCE_INLINE_TEMPLATE size_t
-DONT_VECTORIZE
-ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
-    void* dst, size_t maxDstSize,
-    const void* seqStart, size_t seqSize, int nbSeq,
-    const ZSTD_longOffset_e isLongOffset,
-    const int frame)
-{
-    const BYTE* ip = (const BYTE*)seqStart;
-    const BYTE* const iend = ip + seqSize;
-    BYTE* const ostart = (BYTE*)dst;
-    BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
-    BYTE* op = ostart;
-    const BYTE* litPtr = dctx->litPtr;
-    const BYTE* const litEnd = litPtr + dctx->litSize;
-    const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
-    const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
-    const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
-    DEBUGLOG(5, "ZSTD_decompressSequences_body");
-    (void)frame;
-
-    /* Regen sequences */
-    if (nbSeq) {
-        seqState_t seqState;
-        dctx->fseEntropy = 1;
-        { U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
-        RETURN_ERROR_IF(
-            ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
-            corruption_detected, "");
-        ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
-        ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
-        ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
-        assert(dst != NULL);
-
-        ZSTD_STATIC_ASSERT(
-            BIT_DStream_unfinished < BIT_DStream_completed &&
-            BIT_DStream_endOfBuffer < BIT_DStream_completed &&
-            BIT_DStream_completed < BIT_DStream_overflow);
-
-#if defined(__GNUC__) && defined(__x86_64__)
-            __asm__(".p2align 6");
-            __asm__("nop");
-#  if __GNUC__ >= 7
-            __asm__(".p2align 5");
-            __asm__("nop");
-            __asm__(".p2align 3");
-#  else
-            __asm__(".p2align 4");
-            __asm__("nop");
-            __asm__(".p2align 3");
-#  endif
-#endif
-
-        for ( ; ; ) {
-            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
-            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
-#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
-            assert(!ZSTD_isError(oneSeqSize));
-            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
-#endif
-            if (UNLIKELY(ZSTD_isError(oneSeqSize)))
-                return oneSeqSize;
-            DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
-            op += oneSeqSize;
-            if (UNLIKELY(!--nbSeq))
-                break;
-            BIT_reloadDStream(&(seqState.DStream));
-        }
-
-        /* check if reached exact end */
-        DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
-        RETURN_ERROR_IF(nbSeq, corruption_detected, "");
-        RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
-        /* save reps for next block */
-        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
-    }
-
-    /* last literal segment */
-    {   size_t const lastLLSize = litEnd - litPtr;
-        RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
-        if (op != NULL) {
-            ZSTD_memcpy(op, litPtr, lastLLSize);
-            op += lastLLSize;
-        }
-    }
-
-    return op-ostart;
-}
-
-static size_t
-ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
-                                 void* dst, size_t maxDstSize,
-                           const void* seqStart, size_t seqSize, int nbSeq,
-                           const ZSTD_longOffset_e isLongOffset,
-                           const int frame)
-{
-    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-}
-
-static size_t
-ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
-                                               void* dst, size_t maxDstSize,
-                                         const void* seqStart, size_t seqSize, int nbSeq,
-                                         const ZSTD_longOffset_e isLongOffset,
-                                         const int frame)
-{
-    return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-}
-#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
-
-#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
-
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
-                   const BYTE* const prefixStart, const BYTE* const dictEnd)
-{
-    prefetchPos += sequence.litLength;
-    {   const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
-        const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
-                                                                              * No consequence though : memory address is only used for prefetching, not for dereferencing */
-        PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE);   /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
-    }
-    return prefetchPos + sequence.matchLength;
-}
-
-/* This decoding function employs prefetching
- * to reduce latency impact of cache misses.
- * It's generally employed when block contains a significant portion of long-distance matches
- * or when coupled with a "cold" dictionary */
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_decompressSequencesLong_body(
-                               ZSTD_DCtx* dctx,
-                               void* dst, size_t maxDstSize,
-                         const void* seqStart, size_t seqSize, int nbSeq,
-                         const ZSTD_longOffset_e isLongOffset,
-                         const int frame)
-{
-    const BYTE* ip = (const BYTE*)seqStart;
-    const BYTE* const iend = ip + seqSize;
-    BYTE* const ostart = (BYTE*)dst;
-    BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
-    BYTE* op = ostart;
-    const BYTE* litPtr = dctx->litPtr;
-    const BYTE* litBufferEnd = dctx->litBufferEnd;
-    const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
-    const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
-    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
-    (void)frame;
-
-    /* Regen sequences */
-    if (nbSeq) {
-#define STORED_SEQS 8
-#define STORED_SEQS_MASK (STORED_SEQS-1)
-#define ADVANCED_SEQS STORED_SEQS
-        seq_t sequences[STORED_SEQS];
-        int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
-        seqState_t seqState;
-        int seqNb;
-        size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
-
-        dctx->fseEntropy = 1;
-        { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
-        assert(dst != NULL);
-        assert(iend >= ip);
-        RETURN_ERROR_IF(
-            ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
-            corruption_detected, "");
-        ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
-        ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
-        ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
-
-        /* prepare in advance */
-        for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
-            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
-            prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
-            sequences[seqNb] = sequence;
-        }
-        RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
-
-        /* decompress without stomping litBuffer */
-        for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
-            seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
-            size_t oneSeqSize;
-
-            if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
-            {
-                /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
-                const size_t leftoverLit = dctx->litBufferEnd - litPtr;
-                if (leftoverLit)
-                {
-                    RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
-                    ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
-                    sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
-                    op += leftoverLit;
-                }
-                litPtr = dctx->litExtraBuffer;
-                litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
-                dctx->litBufferLocation = ZSTD_not_in_dst;
-                oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
-#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
-                assert(!ZSTD_isError(oneSeqSize));
-                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
-#endif
-                if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
-
-                prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
-                sequences[seqNb & STORED_SEQS_MASK] = sequence;
-                op += oneSeqSize;
-            }
-            else
-            {
-                /* lit buffer is either wholly contained in first or second split, or not split at all*/
-                oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
-                    ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
-                    ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
-#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
-                assert(!ZSTD_isError(oneSeqSize));
-                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
-#endif
-                if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
-
-                prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
-                sequences[seqNb & STORED_SEQS_MASK] = sequence;
-                op += oneSeqSize;
-            }
-        }
-        RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
-
-        /* finish queue */
-        seqNb -= seqAdvance;
-        for ( ; seqNb<nbSeq ; seqNb++) {
-            seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
-            if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
-            {
-                const size_t leftoverLit = dctx->litBufferEnd - litPtr;
-                if (leftoverLit)
-                {
-                    RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
-                    ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
-                    sequence->litLength -= leftoverLit;
-                    op += leftoverLit;
-                }
-                litPtr = dctx->litExtraBuffer;
-                litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
-                dctx->litBufferLocation = ZSTD_not_in_dst;
-                {
-                    size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
-#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
-                    assert(!ZSTD_isError(oneSeqSize));
-                    if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
-#endif
-                    if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
-                    op += oneSeqSize;
-                }
-            }
-            else
-            {
-                size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
-                    ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
-                    ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
-#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
-                assert(!ZSTD_isError(oneSeqSize));
-                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
-#endif
-                if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
-                op += oneSeqSize;
-            }
-        }
-
-        /* save reps for next block */
-        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
-    }
-
-    /* last literal segment */
-    if (dctx->litBufferLocation == ZSTD_split)  /* first deplete literal buffer in dst, then copy litExtraBuffer */
-    {
-        size_t const lastLLSize = litBufferEnd - litPtr;
-        RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
-        if (op != NULL) {
-            ZSTD_memmove(op, litPtr, lastLLSize);
-            op += lastLLSize;
-        }
-        litPtr = dctx->litExtraBuffer;
-        litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
-    }
-    {   size_t const lastLLSize = litBufferEnd - litPtr;
-        RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
-        if (op != NULL) {
-            ZSTD_memmove(op, litPtr, lastLLSize);
-            op += lastLLSize;
-        }
-    }
-
-    return op-ostart;
-}
-
-static size_t
-ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
-                                 void* dst, size_t maxDstSize,
-                           const void* seqStart, size_t seqSize, int nbSeq,
-                           const ZSTD_longOffset_e isLongOffset,
-                           const int frame)
-{
-    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-}
-#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
-
-
-
-#if DYNAMIC_BMI2
-
-#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
-static BMI2_TARGET_ATTRIBUTE size_t
-DONT_VECTORIZE
-ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
-                                 void* dst, size_t maxDstSize,
-                           const void* seqStart, size_t seqSize, int nbSeq,
-                           const ZSTD_longOffset_e isLongOffset,
-                           const int frame)
-{
-    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-}
-static BMI2_TARGET_ATTRIBUTE size_t
-DONT_VECTORIZE
-ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
-                                 void* dst, size_t maxDstSize,
-                           const void* seqStart, size_t seqSize, int nbSeq,
-                           const ZSTD_longOffset_e isLongOffset,
-                           const int frame)
-{
-    return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-}
-#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
-
-#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
-static BMI2_TARGET_ATTRIBUTE size_t
-ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
-                                 void* dst, size_t maxDstSize,
-                           const void* seqStart, size_t seqSize, int nbSeq,
-                           const ZSTD_longOffset_e isLongOffset,
-                           const int frame)
-{
-    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-}
-#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
-
-#endif /* DYNAMIC_BMI2 */
-
-typedef size_t (*ZSTD_decompressSequences_t)(
-                            ZSTD_DCtx* dctx,
-                            void* dst, size_t maxDstSize,
-                            const void* seqStart, size_t seqSize, int nbSeq,
-                            const ZSTD_longOffset_e isLongOffset,
-                            const int frame);
-
-#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
-static size_t
-ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
-                   const void* seqStart, size_t seqSize, int nbSeq,
-                   const ZSTD_longOffset_e isLongOffset,
-                   const int frame)
-{
-    DEBUGLOG(5, "ZSTD_decompressSequences");
-#if DYNAMIC_BMI2
-    if (ZSTD_DCtx_get_bmi2(dctx)) {
-        return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-    }
-#endif
-    return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-}
-static size_t
-ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
-                                 const void* seqStart, size_t seqSize, int nbSeq,
-                                 const ZSTD_longOffset_e isLongOffset,
-                                 const int frame)
-{
-    DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
-#if DYNAMIC_BMI2
-    if (ZSTD_DCtx_get_bmi2(dctx)) {
-        return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-    }
-#endif
-    return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-}
-#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
-
-
-#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
-/* ZSTD_decompressSequencesLong() :
- * decompression function triggered when a minimum share of offsets is considered "long",
- * aka out of cache.
- * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
- * This function will try to mitigate main memory latency through the use of prefetching */
-static size_t
-ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
-                             void* dst, size_t maxDstSize,
-                             const void* seqStart, size_t seqSize, int nbSeq,
-                             const ZSTD_longOffset_e isLongOffset,
-                             const int frame)
-{
-    DEBUGLOG(5, "ZSTD_decompressSequencesLong");
-#if DYNAMIC_BMI2
-    if (ZSTD_DCtx_get_bmi2(dctx)) {
-        return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-    }
-#endif
-  return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
-}
-#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
-
-
-
-#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
-    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
-/* ZSTD_getLongOffsetsShare() :
- * condition : offTable must be valid
- * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
- *           compared to maximum possible of (1<<OffFSELog) */
-static unsigned
-ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
-{
-    const void* ptr = offTable;
-    U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
-    const ZSTD_seqSymbol* table = offTable + 1;
-    U32 const max = 1 << tableLog;
-    U32 u, total = 0;
-    DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
-
-    assert(max <= (1 << OffFSELog));  /* max not too large */
-    for (u=0; u<max; u++) {
-        if (table[u].nbAdditionalBits > 22) total += 1;
-    }
-
-    assert(tableLog <= OffFSELog);
-    total <<= (OffFSELog - tableLog);  /* scale to OffFSELog */
-
-    return total;
-}
-#endif
-
-size_t
-ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
-                              void* dst, size_t dstCapacity,
-                        const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
-{   /* blockType == blockCompressed */
-    const BYTE* ip = (const BYTE*)src;
-    /* isLongOffset must be true if there are long offsets.
-     * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
-     * We don't expect that to be the case in 64-bit mode.
-     * In block mode, window size is not known, so we have to be conservative.
-     * (note: but it could be evaluated from current-lowLimit)
-     */
-    ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
-    DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
-
-    RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
-
-    /* Decode literals section */
-    {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
-        DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
-        if (ZSTD_isError(litCSize)) return litCSize;
-        ip += litCSize;
-        srcSize -= litCSize;
-    }
-
-    /* Build Decoding Tables */
-    {
-        /* These macros control at build-time which decompressor implementation
-         * we use. If neither is defined, we do some inspection and dispatch at
-         * runtime.
-         */
-#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
-    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
-        int usePrefetchDecoder = dctx->ddictIsCold;
-#endif
-        int nbSeq;
-        size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
-        if (ZSTD_isError(seqHSize)) return seqHSize;
-        ip += seqHSize;
-        srcSize -= seqHSize;
-
-        RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
-
-#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
-    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
-        if ( !usePrefetchDecoder
-          && (!frame || (dctx->fParams.windowSize > (1<<24)))
-          && (nbSeq>ADVANCED_SEQS) ) {  /* could probably use a larger nbSeq limit */
-            U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
-            U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
-            usePrefetchDecoder = (shareLongOffsets >= minShare);
-        }
-#endif
-
-        dctx->ddictIsCold = 0;
-
-#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
-    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
-        if (usePrefetchDecoder)
-#endif
-#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
-            return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
-#endif
-
-#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
-        /* else */
-        if (dctx->litBufferLocation == ZSTD_split)
-            return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
-        else
-            return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
-#endif
-    }
-}
-
-
-void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
-{
-    if (dst != dctx->previousDstEnd && dstSize > 0) {   /* not contiguous */
-        dctx->dictEnd = dctx->previousDstEnd;
-        dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
-        dctx->prefixStart = dst;
-        dctx->previousDstEnd = dst;
-    }
-}
-
-
-size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
-                            void* dst, size_t dstCapacity,
-                      const void* src, size_t srcSize)
-{
-    size_t dSize;
-    ZSTD_checkContinuity(dctx, dst, dstCapacity);
-    dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);
-    dctx->previousDstEnd = (char*)dst + dSize;
-    return dSize;
-}
diff --git a/dep/zstd/lib/decompress/zstd_decompress_block.h b/dep/zstd/lib/decompress/zstd_decompress_block.h
deleted file mode 100644
index c61a9d0c4..000000000
--- a/dep/zstd/lib/decompress/zstd_decompress_block.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-
-#ifndef ZSTD_DEC_BLOCK_H
-#define ZSTD_DEC_BLOCK_H
-
-/*-*******************************************************
- *  Dependencies
- *********************************************************/
-#include "../common/zstd_deps.h"   /* size_t */
-#include "../zstd.h"    /* DCtx, and some public functions */
-#include "../common/zstd_internal.h"  /* blockProperties_t, and some public functions */
-#include "zstd_decompress_internal.h"  /* ZSTD_seqSymbol */
-
-
-/* ===   Prototypes   === */
-
-/* note: prototypes already published within `zstd.h` :
- * ZSTD_decompressBlock()
- */
-
-/* note: prototypes already published within `zstd_internal.h` :
- * ZSTD_getcBlockSize()
- * ZSTD_decodeSeqHeaders()
- */
-
-
- /* Streaming state is used to inform allocation of the literal buffer */
-typedef enum {
-    not_streaming = 0,
-    is_streaming = 1
-} streaming_operation;
-
-/* ZSTD_decompressBlock_internal() :
- * decompress block, starting at `src`,
- * into destination buffer `dst`.
- * @return : decompressed block size,
- *           or an error code (which can be tested using ZSTD_isError())
- */
-size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
-                               void* dst, size_t dstCapacity,
-                         const void* src, size_t srcSize, const int frame, const streaming_operation streaming);
-
-/* ZSTD_buildFSETable() :
- * generate FSE decoding table for one symbol (ll, ml or off)
- * this function must be called with valid parameters only
- * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
- * in which case it cannot fail.
- * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
- * defined in zstd_decompress_internal.h.
- * Internal use only.
- */
-void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
-             const short* normalizedCounter, unsigned maxSymbolValue,
-             const U32* baseValue, const U8* nbAdditionalBits,
-                   unsigned tableLog, void* wksp, size_t wkspSize,
-                   int bmi2);
-
-
-#endif /* ZSTD_DEC_BLOCK_H */
diff --git a/dep/zstd/lib/decompress/zstd_decompress_internal.h b/dep/zstd/lib/decompress/zstd_decompress_internal.h
deleted file mode 100644
index 2b5a53850..000000000
--- a/dep/zstd/lib/decompress/zstd_decompress_internal.h
+++ /dev/null
@@ -1,236 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-
-/* zstd_decompress_internal:
- * objects and definitions shared within lib/decompress modules */
-
- #ifndef ZSTD_DECOMPRESS_INTERNAL_H
- #define ZSTD_DECOMPRESS_INTERNAL_H
-
-
-/*-*******************************************************
- *  Dependencies
- *********************************************************/
-#include "../common/mem.h"             /* BYTE, U16, U32 */
-#include "../common/zstd_internal.h"   /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */
-
-
-
-/*-*******************************************************
- *  Constants
- *********************************************************/
-static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
-                 0,    1,    2,     3,     4,     5,     6,      7,
-                 8,    9,   10,    11,    12,    13,    14,     15,
-                16,   18,   20,    22,    24,    28,    32,     40,
-                48,   64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
-                0x2000, 0x4000, 0x8000, 0x10000 };
-
-static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
-                 0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
-                 0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
-                 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
-                 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
-
-static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = {
-                     0,  1,  2,  3,  4,  5,  6,  7,
-                     8,  9, 10, 11, 12, 13, 14, 15,
-                    16, 17, 18, 19, 20, 21, 22, 23,
-                    24, 25, 26, 27, 28, 29, 30, 31 };
-
-static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
-                     3,  4,  5,    6,     7,     8,     9,    10,
-                    11, 12, 13,   14,    15,    16,    17,    18,
-                    19, 20, 21,   22,    23,    24,    25,    26,
-                    27, 28, 29,   30,    31,    32,    33,    34,
-                    35, 37, 39,   41,    43,    47,    51,    59,
-                    67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
-                    0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
-
-
-/*-*******************************************************
- *  Decompression types
- *********************************************************/
- typedef struct {
-     U32 fastMode;
-     U32 tableLog;
- } ZSTD_seqSymbol_header;
-
- typedef struct {
-     U16  nextState;
-     BYTE nbAdditionalBits;
-     BYTE nbBits;
-     U32  baseValue;
- } ZSTD_seqSymbol;
-
- #define SEQSYMBOL_TABLE_SIZE(log)   (1 + (1 << (log)))
-
-#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
-#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
-
-typedef struct {
-    ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];    /* Note : Space reserved for FSE Tables */
-    ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];   /* is also used as temporary workspace while building hufTable during DDict creation */
-    ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];    /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
-    HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)];  /* can accommodate HUF_decompress4X */
-    U32 rep[ZSTD_REP_NUM];
-    U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
-} ZSTD_entropyDTables_t;
-
-typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
-               ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
-               ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
-               ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
-
-typedef enum { zdss_init=0, zdss_loadHeader,
-               zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
-
-typedef enum {
-    ZSTD_use_indefinitely = -1,  /* Use the dictionary indefinitely */
-    ZSTD_dont_use = 0,           /* Do not use the dictionary (if one exists free it) */
-    ZSTD_use_once = 1            /* Use the dictionary once and set to ZSTD_dont_use */
-} ZSTD_dictUses_e;
-
-/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
-typedef struct {
-    const ZSTD_DDict** ddictPtrTable;
-    size_t ddictPtrTableSize;
-    size_t ddictPtrCount;
-} ZSTD_DDictHashSet;
-
-#ifndef ZSTD_DECODER_INTERNAL_BUFFER
-#  define ZSTD_DECODER_INTERNAL_BUFFER  (1 << 16)
-#endif
-
-#define ZSTD_LBMIN 64
-#define ZSTD_LBMAX (128 << 10)
-
-/* extra buffer, compensates when dst is not large enough to store litBuffer */
-#define ZSTD_LITBUFFEREXTRASIZE  BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
-
-typedef enum {
-    ZSTD_not_in_dst = 0,  /* Stored entirely within litExtraBuffer */
-    ZSTD_in_dst = 1,           /* Stored entirely within dst (in memory after current output write) */
-    ZSTD_split = 2            /* Split between litExtraBuffer and dst */
-} ZSTD_litLocation_e;
-
-struct ZSTD_DCtx_s
-{
-    const ZSTD_seqSymbol* LLTptr;
-    const ZSTD_seqSymbol* MLTptr;
-    const ZSTD_seqSymbol* OFTptr;
-    const HUF_DTable* HUFptr;
-    ZSTD_entropyDTables_t entropy;
-    U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];   /* space needed when building huffman tables */
-    const void* previousDstEnd;   /* detect continuity */
-    const void* prefixStart;      /* start of current segment */
-    const void* virtualStart;     /* virtual start of previous segment if it was just before current one */
-    const void* dictEnd;          /* end of previous segment */
-    size_t expected;
-    ZSTD_frameHeader fParams;
-    U64 processedCSize;
-    U64 decodedSize;
-    blockType_e bType;            /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
-    ZSTD_dStage stage;
-    U32 litEntropy;
-    U32 fseEntropy;
-    XXH64_state_t xxhState;
-    size_t headerSize;
-    ZSTD_format_e format;
-    ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum;   /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
-    U32 validateChecksum;         /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
-    const BYTE* litPtr;
-    ZSTD_customMem customMem;
-    size_t litSize;
-    size_t rleSize;
-    size_t staticSize;
-#if DYNAMIC_BMI2 != 0
-    int bmi2;                     /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
-#endif
-
-    /* dictionary */
-    ZSTD_DDict* ddictLocal;
-    const ZSTD_DDict* ddict;     /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
-    U32 dictID;
-    int ddictIsCold;             /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
-    ZSTD_dictUses_e dictUses;
-    ZSTD_DDictHashSet* ddictSet;                    /* Hash set for multiple ddicts */
-    ZSTD_refMultipleDDicts_e refMultipleDDicts;     /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
-
-    /* streaming */
-    ZSTD_dStreamStage streamStage;
-    char*  inBuff;
-    size_t inBuffSize;
-    size_t inPos;
-    size_t maxWindowSize;
-    char*  outBuff;
-    size_t outBuffSize;
-    size_t outStart;
-    size_t outEnd;
-    size_t lhSize;
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
-    void* legacyContext;
-    U32 previousLegacyVersion;
-    U32 legacyVersion;
-#endif
-    U32 hostageByte;
-    int noForwardProgress;
-    ZSTD_bufferMode_e outBufferMode;
-    ZSTD_outBuffer expectedOutBuffer;
-
-    /* workspace */
-    BYTE* litBuffer;
-    const BYTE* litBufferEnd;
-    ZSTD_litLocation_e litBufferLocation;
-    BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
-    BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
-
-    size_t oversizedDuration;
-
-#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
-    void const* dictContentBeginForFuzzing;
-    void const* dictContentEndForFuzzing;
-#endif
-
-    /* Tracing */
-#if ZSTD_TRACE
-    ZSTD_TraceCtx traceCtx;
-#endif
-};  /* typedef'd to ZSTD_DCtx within "zstd.h" */
-
-MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
-#if DYNAMIC_BMI2 != 0
-	return dctx->bmi2;
-#else
-    (void)dctx;
-	return 0;
-#endif
-}
-
-/*-*******************************************************
- *  Shared internal functions
- *********************************************************/
-
-/*! ZSTD_loadDEntropy() :
- *  dict : must point at beginning of a valid zstd dictionary.
- * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
-size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
-                   const void* const dict, size_t const dictSize);
-
-/*! ZSTD_checkContinuity() :
- *  check if next `dst` follows previous position, where decompression ended.
- *  If yes, do nothing (continue on current segment).
- *  If not, classify previous segment as "external dictionary", and start a new segment.
- *  This function cannot fail. */
-void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
-
-
-#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
diff --git a/dep/zstd/lib/zdict.h b/dep/zstd/lib/zdict.h
deleted file mode 100644
index f1e139a40..000000000
--- a/dep/zstd/lib/zdict.h
+++ /dev/null
@@ -1,452 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef DICTBUILDER_H_001
-#define DICTBUILDER_H_001
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-
-/*======  Dependencies  ======*/
-#include <stddef.h>  /* size_t */
-
-
-/* =====   ZDICTLIB_API : control library symbols visibility   ===== */
-#ifndef ZDICTLIB_VISIBILITY
-#  if defined(__GNUC__) && (__GNUC__ >= 4)
-#    define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
-#  else
-#    define ZDICTLIB_VISIBILITY
-#  endif
-#endif
-#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
-#  define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
-#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
-#  define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
-#else
-#  define ZDICTLIB_API ZDICTLIB_VISIBILITY
-#endif
-
-/*******************************************************************************
- * Zstd dictionary builder
- *
- * FAQ
- * ===
- * Why should I use a dictionary?
- * ------------------------------
- *
- * Zstd can use dictionaries to improve compression ratio of small data.
- * Traditionally small files don't compress well because there is very little
- * repetition in a single sample, since it is small. But, if you are compressing
- * many similar files, like a bunch of JSON records that share the same
- * structure, you can train a dictionary on ahead of time on some samples of
- * these files. Then, zstd can use the dictionary to find repetitions that are
- * present across samples. This can vastly improve compression ratio.
- *
- * When is a dictionary useful?
- * ----------------------------
- *
- * Dictionaries are useful when compressing many small files that are similar.
- * The larger a file is, the less benefit a dictionary will have. Generally,
- * we don't expect dictionary compression to be effective past 100KB. And the
- * smaller a file is, the more we would expect the dictionary to help.
- *
- * How do I use a dictionary?
- * --------------------------
- *
- * Simply pass the dictionary to the zstd compressor with
- * `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to
- * the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other
- * more advanced functions that allow selecting some options, see zstd.h for
- * complete documentation.
- *
- * What is a zstd dictionary?
- * --------------------------
- *
- * A zstd dictionary has two pieces: Its header, and its content. The header
- * contains a magic number, the dictionary ID, and entropy tables. These
- * entropy tables allow zstd to save on header costs in the compressed file,
- * which really matters for small data. The content is just bytes, which are
- * repeated content that is common across many samples.
- *
- * What is a raw content dictionary?
- * ---------------------------------
- *
- * A raw content dictionary is just bytes. It doesn't have a zstd dictionary
- * header, a dictionary ID, or entropy tables. Any buffer is a valid raw
- * content dictionary.
- *
- * How do I train a dictionary?
- * ----------------------------
- *
- * Gather samples from your use case. These samples should be similar to each
- * other. If you have several use cases, you could try to train one dictionary
- * per use case.
- *
- * Pass those samples to `ZDICT_trainFromBuffer()` and that will train your
- * dictionary. There are a few advanced versions of this function, but this
- * is a great starting point. If you want to further tune your dictionary
- * you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow
- * you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`.
- *
- * If the dictionary training function fails, that is likely because you
- * either passed too few samples, or a dictionary would not be effective
- * for your data. Look at the messages that the dictionary trainer printed,
- * if it doesn't say too few samples, then a dictionary would not be effective.
- *
- * How large should my dictionary be?
- * ----------------------------------
- *
- * A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB.
- * The zstd CLI defaults to a 110KB dictionary. You likely don't need a
- * dictionary larger than that. But, most use cases can get away with a
- * smaller dictionary. The advanced dictionary builders can automatically
- * shrink the dictionary for you, and select a the smallest size that
- * doesn't hurt compression ratio too much. See the `shrinkDict` parameter.
- * A smaller dictionary can save memory, and potentially speed up
- * compression.
- *
- * How many samples should I provide to the dictionary builder?
- * ------------------------------------------------------------
- *
- * We generally recommend passing ~100x the size of the dictionary
- * in samples. A few thousand should suffice. Having too few samples
- * can hurt the dictionaries effectiveness. Having more samples will
- * only improve the dictionaries effectiveness. But having too many
- * samples can slow down the dictionary builder.
- *
- * How do I determine if a dictionary will be effective?
- * -----------------------------------------------------
- *
- * Simply train a dictionary and try it out. You can use zstd's built in
- * benchmarking tool to test the dictionary effectiveness.
- *
- *   # Benchmark levels 1-3 without a dictionary
- *   zstd -b1e3 -r /path/to/my/files
- *   # Benchmark levels 1-3 with a dictionary
- *   zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
- *
- * When should I retrain a dictionary?
- * -----------------------------------
- *
- * You should retrain a dictionary when its effectiveness drops. Dictionary
- * effectiveness drops as the data you are compressing changes. Generally, we do
- * expect dictionaries to "decay" over time, as your data changes, but the rate
- * at which they decay depends on your use case. Internally, we regularly
- * retrain dictionaries, and if the new dictionary performs significantly
- * better than the old dictionary, we will ship the new dictionary.
- *
- * I have a raw content dictionary, how do I turn it into a zstd dictionary?
- * -------------------------------------------------------------------------
- *
- * If you have a raw content dictionary, e.g. by manually constructing it, or
- * using a third-party dictionary builder, you can turn it into a zstd
- * dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to
- * provide some samples of the data. It will add the zstd header to the
- * raw content, which contains a dictionary ID and entropy tables, which
- * will improve compression ratio, and allow zstd to write the dictionary ID
- * into the frame, if you so choose.
- *
- * Do I have to use zstd's dictionary builder?
- * -------------------------------------------
- *
- * No! You can construct dictionary content however you please, it is just
- * bytes. It will always be valid as a raw content dictionary. If you want
- * a zstd dictionary, which can improve compression ratio, use
- * `ZDICT_finalizeDictionary()`.
- *
- * What is the attack surface of a zstd dictionary?
- * ------------------------------------------------
- *
- * Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so
- * zstd should never crash, or access out-of-bounds memory no matter what
- * the dictionary is. However, if an attacker can control the dictionary
- * during decompression, they can cause zstd to generate arbitrary bytes,
- * just like if they controlled the compressed data.
- *
- ******************************************************************************/
-
-
-/*! ZDICT_trainFromBuffer():
- *  Train a dictionary from an array of samples.
- *  Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4,
- *  f=20, and accel=1.
- *  Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
- *  supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
- *  The resulting dictionary will be saved into `dictBuffer`.
- * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- *          or an error code, which can be tested with ZDICT_isError().
- *  Note:  Dictionary training will fail if there are not enough samples to construct a
- *         dictionary, or if most of the samples are too small (< 8 bytes being the lower limit).
- *         If dictionary training fails, you should use zstd without a dictionary, as the dictionary
- *         would've been ineffective anyways. If you believe your samples would benefit from a dictionary
- *         please open an issue with details, and we can look into it.
- *  Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB.
- *  Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
- *        It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
- *        In general, it's recommended to provide a few thousands samples, though this can vary a lot.
- *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
- */
-ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
-                                    const void* samplesBuffer,
-                                    const size_t* samplesSizes, unsigned nbSamples);
-
-typedef struct {
-    int      compressionLevel;   /*< optimize for a specific zstd compression level; 0 means default */
-    unsigned notificationLevel;  /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
-    unsigned dictID;             /*< force dictID value; 0 means auto mode (32-bits random value)
-                                  *   NOTE: The zstd format reserves some dictionary IDs for future use.
-                                  *         You may use them in private settings, but be warned that they
-                                  *         may be used by zstd in a public dictionary registry in the future.
-                                  *         These dictionary IDs are:
-                                  *           - low range  : <= 32767
-                                  *           - high range : >= (2^31)
-                                  */
-} ZDICT_params_t;
-
-/*! ZDICT_finalizeDictionary():
- * Given a custom content as a basis for dictionary, and a set of samples,
- * finalize dictionary by adding headers and statistics according to the zstd
- * dictionary format.
- *
- * Samples must be stored concatenated in a flat buffer `samplesBuffer`,
- * supplied with an array of sizes `samplesSizes`, providing the size of each
- * sample in order. The samples are used to construct the statistics, so they
- * should be representative of what you will compress with this dictionary.
- *
- * The compression level can be set in `parameters`. You should pass the
- * compression level you expect to use in production. The statistics for each
- * compression level differ, so tuning the dictionary for the compression level
- * can help quite a bit.
- *
- * You can set an explicit dictionary ID in `parameters`, or allow us to pick
- * a random dictionary ID for you, but we can't guarantee no collisions.
- *
- * The dstDictBuffer and the dictContent may overlap, and the content will be
- * appended to the end of the header. If the header + the content doesn't fit in
- * maxDictSize the beginning of the content is truncated to make room, since it
- * is presumed that the most profitable content is at the end of the dictionary,
- * since that is the cheapest to reference.
- *
- * `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
- *
- * @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
- *          or an error code, which can be tested by ZDICT_isError().
- * Note: ZDICT_finalizeDictionary() will push notifications into stderr if
- *       instructed to, using notificationLevel>0.
- * NOTE: This function currently may fail in several edge cases including:
- *         * Not enough samples
- *         * Samples are uncompressible
- *         * Samples are all exactly the same
- */
-ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize,
-                                const void* dictContent, size_t dictContentSize,
-                                const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
-                                ZDICT_params_t parameters);
-
-
-/*======   Helper functions   ======*/
-ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize);  /**< extracts dictID; @return zero if error (not a valid dictionary) */
-ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize);  /* returns dict header size; returns a ZSTD error code on failure */
-ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
-ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
-
-
-
-#ifdef ZDICT_STATIC_LINKING_ONLY
-
-/* ====================================================================================
- * The definitions in this section are considered experimental.
- * They should never be used with a dynamic library, as they may change in the future.
- * They are provided for advanced usages.
- * Use them only in association with static linking.
- * ==================================================================================== */
-
-#define ZDICT_DICTSIZE_MIN    256
-/* Deprecated: Remove in v1.6.0 */
-#define ZDICT_CONTENTSIZE_MIN 128
-
-/*! ZDICT_cover_params_t:
- *  k and d are the only required parameters.
- *  For others, value 0 means default.
- */
-typedef struct {
-    unsigned k;                  /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
-    unsigned d;                  /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
-    unsigned steps;              /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
-    unsigned nbThreads;          /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
-    double splitPoint;           /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
-    unsigned shrinkDict;         /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking  */
-    unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
-    ZDICT_params_t zParams;
-} ZDICT_cover_params_t;
-
-typedef struct {
-    unsigned k;                  /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
-    unsigned d;                  /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
-    unsigned f;                  /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/
-    unsigned steps;              /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
-    unsigned nbThreads;          /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
-    double splitPoint;           /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
-    unsigned accel;              /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */
-    unsigned shrinkDict;         /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking  */
-    unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
-
-    ZDICT_params_t zParams;
-} ZDICT_fastCover_params_t;
-
-/*! ZDICT_trainFromBuffer_cover():
- *  Train a dictionary from an array of samples using the COVER algorithm.
- *  Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
- *  supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
- *  The resulting dictionary will be saved into `dictBuffer`.
- * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- *          or an error code, which can be tested with ZDICT_isError().
- *          See ZDICT_trainFromBuffer() for details on failure modes.
- *  Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
- *  Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
- *        It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
- *        In general, it's recommended to provide a few thousands samples, though this can vary a lot.
- *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
- */
-ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
-          void *dictBuffer, size_t dictBufferCapacity,
-    const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
-          ZDICT_cover_params_t parameters);
-
-/*! ZDICT_optimizeTrainFromBuffer_cover():
- * The same requirements as above hold for all the parameters except `parameters`.
- * This function tries many parameter combinations and picks the best parameters.
- * `*parameters` is filled with the best parameters found,
- * dictionary constructed with those parameters is stored in `dictBuffer`.
- *
- * All of the parameters d, k, steps are optional.
- * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
- * if steps is zero it defaults to its default value.
- * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
- *
- * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- *          or an error code, which can be tested with ZDICT_isError().
- *          On success `*parameters` contains the parameters selected.
- *          See ZDICT_trainFromBuffer() for details on failure modes.
- * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
- */
-ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
-          void* dictBuffer, size_t dictBufferCapacity,
-    const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
-          ZDICT_cover_params_t* parameters);
-
-/*! ZDICT_trainFromBuffer_fastCover():
- *  Train a dictionary from an array of samples using a modified version of COVER algorithm.
- *  Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
- *  supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
- *  d and k are required.
- *  All other parameters are optional, will use default values if not provided
- *  The resulting dictionary will be saved into `dictBuffer`.
- * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- *          or an error code, which can be tested with ZDICT_isError().
- *          See ZDICT_trainFromBuffer() for details on failure modes.
- *  Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory.
- *  Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
- *        It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
- *        In general, it's recommended to provide a few thousands samples, though this can vary a lot.
- *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
- */
-ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
-                    size_t dictBufferCapacity, const void *samplesBuffer,
-                    const size_t *samplesSizes, unsigned nbSamples,
-                    ZDICT_fastCover_params_t parameters);
-
-/*! ZDICT_optimizeTrainFromBuffer_fastCover():
- * The same requirements as above hold for all the parameters except `parameters`.
- * This function tries many parameter combinations (specifically, k and d combinations)
- * and picks the best parameters. `*parameters` is filled with the best parameters found,
- * dictionary constructed with those parameters is stored in `dictBuffer`.
- * All of the parameters d, k, steps, f, and accel are optional.
- * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
- * if steps is zero it defaults to its default value.
- * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
- * If f is zero, default value of 20 is used.
- * If accel is zero, default value of 1 is used.
- *
- * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- *          or an error code, which can be tested with ZDICT_isError().
- *          On success `*parameters` contains the parameters selected.
- *          See ZDICT_trainFromBuffer() for details on failure modes.
- * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread.
- */
-ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
-                    size_t dictBufferCapacity, const void* samplesBuffer,
-                    const size_t* samplesSizes, unsigned nbSamples,
-                    ZDICT_fastCover_params_t* parameters);
-
-typedef struct {
-    unsigned selectivityLevel;   /* 0 means default; larger => select more => larger dictionary */
-    ZDICT_params_t zParams;
-} ZDICT_legacy_params_t;
-
-/*! ZDICT_trainFromBuffer_legacy():
- *  Train a dictionary from an array of samples.
- *  Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
- *  supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
- *  The resulting dictionary will be saved into `dictBuffer`.
- * `parameters` is optional and can be provided with values set to 0 to mean "default".
- * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- *          or an error code, which can be tested with ZDICT_isError().
- *          See ZDICT_trainFromBuffer() for details on failure modes.
- *  Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
- *        It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
- *        In general, it's recommended to provide a few thousands samples, though this can vary a lot.
- *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
- *  Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
- */
-ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
-    void* dictBuffer, size_t dictBufferCapacity,
-    const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
-    ZDICT_legacy_params_t parameters);
-
-
-/* Deprecation warnings */
-/* It is generally possible to disable deprecation warnings from compiler,
-   for example with -Wno-deprecated-declarations for gcc
-   or _CRT_SECURE_NO_WARNINGS in Visual.
-   Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
-#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
-#  define ZDICT_DEPRECATED(message) ZDICTLIB_API   /* disable deprecation warnings */
-#else
-#  define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
-#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
-#    define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
-#  elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
-#    define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
-#  elif (ZDICT_GCC_VERSION >= 301)
-#    define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
-#  elif defined(_MSC_VER)
-#    define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message))
-#  else
-#    pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
-#    define ZDICT_DEPRECATED(message) ZDICTLIB_API
-#  endif
-#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
-
-ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
-size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
-                                  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
-
-
-#endif   /* ZDICT_STATIC_LINKING_ONLY */
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif   /* DICTBUILDER_H_001 */
diff --git a/dep/zstd/lib/zstd.h b/dep/zstd/lib/zstd.h
deleted file mode 100644
index a88ae7bf8..000000000
--- a/dep/zstd/lib/zstd.h
+++ /dev/null
@@ -1,2575 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-#ifndef ZSTD_H_235446
-#define ZSTD_H_235446
-
-/* ======   Dependency   ======*/
-#include <limits.h>   /* INT_MAX */
-#include <stddef.h>   /* size_t */
-
-
-/* =====   ZSTDLIB_API : control library symbols visibility   ===== */
-#ifndef ZSTDLIB_VISIBLE
-#  if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
-#    define ZSTDLIB_VISIBLE __attribute__ ((visibility ("default")))
-#    define ZSTDLIB_HIDDEN __attribute__ ((visibility ("hidden")))
-#  else
-#    define ZSTDLIB_VISIBLE
-#    define ZSTDLIB_HIDDEN
-#  endif
-#endif
-#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
-#  define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBLE
-#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
-#  define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBLE /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
-#else
-#  define ZSTDLIB_API ZSTDLIB_VISIBLE
-#endif
-
-
-/*******************************************************************************
-  Introduction
-
-  zstd, short for Zstandard, is a fast lossless compression algorithm, targeting
-  real-time compression scenarios at zlib-level and better compression ratios.
-  The zstd compression library provides in-memory compression and decompression
-  functions.
-
-  The library supports regular compression levels from 1 up to ZSTD_maxCLevel(),
-  which is currently 22. Levels >= 20, labeled `--ultra`, should be used with
-  caution, as they require more memory. The library also offers negative
-  compression levels, which extend the range of speed vs. ratio preferences.
-  The lower the level, the faster the speed (at the cost of compression).
-
-  Compression can be done in:
-    - a single step (described as Simple API)
-    - a single step, reusing a context (described as Explicit context)
-    - unbounded multiple steps (described as Streaming compression)
-
-  The compression ratio achievable on small data can be highly improved using
-  a dictionary. Dictionary compression can be performed in:
-    - a single step (described as Simple dictionary API)
-    - a single step, reusing a dictionary (described as Bulk-processing
-      dictionary API)
-
-  Advanced experimental functions can be accessed using
-  `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h.
-
-  Advanced experimental APIs should never be used with a dynamically-linked
-  library. They are not "stable"; their definitions or signatures may change in
-  the future. Only static linking is allowed.
-*******************************************************************************/
-
-/*------   Version   ------*/
-#define ZSTD_VERSION_MAJOR    1
-#define ZSTD_VERSION_MINOR    5
-#define ZSTD_VERSION_RELEASE  2
-#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
-
-/*! ZSTD_versionNumber() :
- *  Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */
-ZSTDLIB_API unsigned ZSTD_versionNumber(void);
-
-#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
-#define ZSTD_QUOTE(str) #str
-#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
-#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
-
-/*! ZSTD_versionString() :
- *  Return runtime library version, like "1.4.5". Requires v1.3.0+. */
-ZSTDLIB_API const char* ZSTD_versionString(void);
-
-/* *************************************
- *  Default constant
- ***************************************/
-#ifndef ZSTD_CLEVEL_DEFAULT
-#  define ZSTD_CLEVEL_DEFAULT 3
-#endif
-
-/* *************************************
- *  Constants
- ***************************************/
-
-/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */
-#define ZSTD_MAGICNUMBER            0xFD2FB528    /* valid since v0.8.0 */
-#define ZSTD_MAGIC_DICTIONARY       0xEC30A437    /* valid since v0.7.0 */
-#define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50    /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */
-#define ZSTD_MAGIC_SKIPPABLE_MASK   0xFFFFFFF0
-
-#define ZSTD_BLOCKSIZELOG_MAX  17
-#define ZSTD_BLOCKSIZE_MAX     (1<<ZSTD_BLOCKSIZELOG_MAX)
-
-
-/***************************************
-*  Simple API
-***************************************/
-/*! ZSTD_compress() :
- *  Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
- *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
- *  @return : compressed size written into `dst` (<= `dstCapacity),
- *            or an error code if it fails (which can be tested using ZSTD_isError()). */
-ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
-                            const void* src, size_t srcSize,
-                                  int compressionLevel);
-
-/*! ZSTD_decompress() :
- *  `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
- *  `dstCapacity` is an upper bound of originalSize to regenerate.
- *  If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
- *  @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
- *            or an errorCode if it fails (which can be tested using ZSTD_isError()). */
-ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
-                              const void* src, size_t compressedSize);
-
-/*! ZSTD_getFrameContentSize() : requires v1.3.0+
- *  `src` should point to the start of a ZSTD encoded frame.
- *  `srcSize` must be at least as large as the frame header.
- *            hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
- *  @return : - decompressed size of `src` frame content, if known
- *            - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
- *            - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
- *   note 1 : a 0 return value means the frame is valid but "empty".
- *   note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode.
- *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
- *            In which case, it's necessary to use streaming mode to decompress data.
- *            Optionally, application can rely on some implicit limit,
- *            as ZSTD_decompress() only needs an upper bound of decompressed size.
- *            (For example, data could be necessarily cut into blocks <= 16 KB).
- *   note 3 : decompressed size is always present when compression is completed using single-pass functions,
- *            such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict().
- *   note 4 : decompressed size can be very large (64-bits value),
- *            potentially larger than what local system can handle as a single memory segment.
- *            In which case, it's necessary to use streaming mode to decompress data.
- *   note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
- *            Always ensure return value fits within application's authorized limits.
- *            Each application can set its own limits.
- *   note 6 : This function replaces ZSTD_getDecompressedSize() */
-#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
-#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
-ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
-
-/*! ZSTD_getDecompressedSize() :
- *  NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize().
- *  Both functions work the same way, but ZSTD_getDecompressedSize() blends
- *  "empty", "unknown" and "error" results to the same return value (0),
- *  while ZSTD_getFrameContentSize() gives them separate return values.
- * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
-ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
-
-/*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+
- * `src` should point to the start of a ZSTD frame or skippable frame.
- * `srcSize` must be >= first frame size
- * @return : the compressed size of the first frame starting at `src`,
- *           suitable to pass as `srcSize` to `ZSTD_decompress` or similar,
- *        or an error code if input is invalid */
-ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
-
-
-/*======  Helper functions  ======*/
-#define ZSTD_COMPRESSBOUND(srcSize)   ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
-ZSTDLIB_API size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
-ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
-ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
-ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed, requires v1.4.0+ */
-ZSTDLIB_API int         ZSTD_maxCLevel(void);               /*!< maximum compression level available */
-ZSTDLIB_API int         ZSTD_defaultCLevel(void);           /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */
-
-
-/***************************************
-*  Explicit context
-***************************************/
-/*= Compression context
- *  When compressing many times,
- *  it is recommended to allocate a context just once,
- *  and re-use it for each successive compression operation.
- *  This will make workload friendlier for system's memory.
- *  Note : re-using context is just a speed / resource optimization.
- *         It doesn't change the compression ratio, which remains identical.
- *  Note 2 : In multi-threaded environments,
- *         use one different context per thread for parallel execution.
- */
-typedef struct ZSTD_CCtx_s ZSTD_CCtx;
-ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
-ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);  /* accept NULL pointer */
-
-/*! ZSTD_compressCCtx() :
- *  Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
- *  Important : in order to behave similarly to `ZSTD_compress()`,
- *  this function compresses at requested compression level,
- *  __ignoring any other parameter__ .
- *  If any advanced parameter was set using the advanced API,
- *  they will all be reset. Only `compressionLevel` remains.
- */
-ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
-                                     void* dst, size_t dstCapacity,
-                               const void* src, size_t srcSize,
-                                     int compressionLevel);
-
-/*= Decompression context
- *  When decompressing many times,
- *  it is recommended to allocate a context only once,
- *  and re-use it for each successive compression operation.
- *  This will make workload friendlier for system's memory.
- *  Use one context per thread for parallel execution. */
-typedef struct ZSTD_DCtx_s ZSTD_DCtx;
-ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
-ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);  /* accept NULL pointer */
-
-/*! ZSTD_decompressDCtx() :
- *  Same as ZSTD_decompress(),
- *  requires an allocated ZSTD_DCtx.
- *  Compatible with sticky parameters.
- */
-ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
-                                       void* dst, size_t dstCapacity,
-                                 const void* src, size_t srcSize);
-
-
-/*********************************************
-*  Advanced compression API (Requires v1.4.0+)
-**********************************************/
-
-/* API design :
- *   Parameters are pushed one by one into an existing context,
- *   using ZSTD_CCtx_set*() functions.
- *   Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
- *   "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
- *   __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ .
- *
- *   It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
- *
- *   This API supersedes all other "advanced" API entry points in the experimental section.
- *   In the future, we expect to remove from experimental API entry points which are redundant with this API.
- */
-
-
-/* Compression strategies, listed from fastest to strongest */
-typedef enum { ZSTD_fast=1,
-               ZSTD_dfast=2,
-               ZSTD_greedy=3,
-               ZSTD_lazy=4,
-               ZSTD_lazy2=5,
-               ZSTD_btlazy2=6,
-               ZSTD_btopt=7,
-               ZSTD_btultra=8,
-               ZSTD_btultra2=9
-               /* note : new strategies _might_ be added in the future.
-                         Only the order (from fast to strong) is guaranteed */
-} ZSTD_strategy;
-
-typedef enum {
-
-    /* compression parameters
-     * Note: When compressing with a ZSTD_CDict these parameters are superseded
-     * by the parameters used to construct the ZSTD_CDict.
-     * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */
-    ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table.
-                              * Note that exact compression parameters are dynamically determined,
-                              * depending on both compression level and srcSize (when known).
-                              * Default level is ZSTD_CLEVEL_DEFAULT==3.
-                              * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
-                              * Note 1 : it's possible to pass a negative compression level.
-                              * Note 2 : setting a level does not automatically set all other compression parameters
-                              *   to default. Setting this will however eventually dynamically impact the compression
-                              *   parameters which have not been manually set. The manually set
-                              *   ones will 'stick'. */
-    /* Advanced compression parameters :
-     * It's possible to pin down compression parameters to some specific values.
-     * In which case, these values are no longer dynamically selected by the compressor */
-    ZSTD_c_windowLog=101,    /* Maximum allowed back-reference distance, expressed as power of 2.
-                              * This will set a memory budget for streaming decompression,
-                              * with larger values requiring more memory
-                              * and typically compressing more.
-                              * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
-                              * Special: value 0 means "use default windowLog".
-                              * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT
-                              *       requires explicitly allowing such size at streaming decompression stage. */
-    ZSTD_c_hashLog=102,      /* Size of the initial probe table, as a power of 2.
-                              * Resulting memory usage is (1 << (hashLog+2)).
-                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
-                              * Larger tables improve compression ratio of strategies <= dFast,
-                              * and improve speed of strategies > dFast.
-                              * Special: value 0 means "use default hashLog". */
-    ZSTD_c_chainLog=103,     /* Size of the multi-probe search table, as a power of 2.
-                              * Resulting memory usage is (1 << (chainLog+2)).
-                              * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
-                              * Larger tables result in better and slower compression.
-                              * This parameter is useless for "fast" strategy.
-                              * It's still useful when using "dfast" strategy,
-                              * in which case it defines a secondary probe table.
-                              * Special: value 0 means "use default chainLog". */
-    ZSTD_c_searchLog=104,    /* Number of search attempts, as a power of 2.
-                              * More attempts result in better and slower compression.
-                              * This parameter is useless for "fast" and "dFast" strategies.
-                              * Special: value 0 means "use default searchLog". */
-    ZSTD_c_minMatch=105,     /* Minimum size of searched matches.
-                              * Note that Zstandard can still find matches of smaller size,
-                              * it just tweaks its search algorithm to look for this size and larger.
-                              * Larger values increase compression and decompression speed, but decrease ratio.
-                              * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX.
-                              * Note that currently, for all strategies < btopt, effective minimum is 4.
-                              *                    , for all strategies > fast, effective maximum is 6.
-                              * Special: value 0 means "use default minMatchLength". */
-    ZSTD_c_targetLength=106, /* Impact of this field depends on strategy.
-                              * For strategies btopt, btultra & btultra2:
-                              *     Length of Match considered "good enough" to stop search.
-                              *     Larger values make compression stronger, and slower.
-                              * For strategy fast:
-                              *     Distance between match sampling.
-                              *     Larger values make compression faster, and weaker.
-                              * Special: value 0 means "use default targetLength". */
-    ZSTD_c_strategy=107,     /* See ZSTD_strategy enum definition.
-                              * The higher the value of selected strategy, the more complex it is,
-                              * resulting in stronger and slower compression.
-                              * Special: value 0 means "use default strategy". */
-    /* LDM mode parameters */
-    ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
-                                     * This parameter is designed to improve compression ratio
-                                     * for large inputs, by finding large matches at long distance.
-                                     * It increases memory usage and window size.
-                                     * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB
-                                     * except when expressly set to a different value.
-                                     * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and
-                                     * compression strategy >= ZSTD_btopt (== compression level 16+) */
-    ZSTD_c_ldmHashLog=161,   /* Size of the table for long distance matching, as a power of 2.
-                              * Larger values increase memory usage and compression ratio,
-                              * but decrease compression speed.
-                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
-                              * default: windowlog - 7.
-                              * Special: value 0 means "automatically determine hashlog". */
-    ZSTD_c_ldmMinMatch=162,  /* Minimum match size for long distance matcher.
-                              * Larger/too small values usually decrease compression ratio.
-                              * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX.
-                              * Special: value 0 means "use default value" (default: 64). */
-    ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution.
-                              * Larger values improve collision resolution but decrease compression speed.
-                              * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX.
-                              * Special: value 0 means "use default value" (default: 3). */
-    ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table.
-                              * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN).
-                              * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage.
-                              * Larger values improve compression speed.
-                              * Deviating far from default value will likely result in a compression ratio decrease.
-                              * Special: value 0 means "automatically determine hashRateLog". */
-
-    /* frame parameters */
-    ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)
-                              * Content size must be known at the beginning of compression.
-                              * This is automatically the case when using ZSTD_compress2(),
-                              * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
-    ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */
-    ZSTD_c_dictIDFlag=202,   /* When applicable, dictionary's ID is written into frame header (default:1) */
-
-    /* multi-threading parameters */
-    /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD).
-     * Otherwise, trying to set any other value than default (0) will be a no-op and return an error.
-     * In a situation where it's unknown if the linked library supports multi-threading or not,
-     * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property.
-     */
-    ZSTD_c_nbWorkers=400,    /* Select how many threads will be spawned to compress in parallel.
-                              * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() :
-                              * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller,
-                              * while compression is performed in parallel, within worker thread(s).
-                              * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end :
-                              *  in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call).
-                              * More workers improve speed, but also increase memory usage.
-                              * Default value is `0`, aka "single-threaded mode" : no worker is spawned,
-                              * compression is performed inside Caller's thread, and all invocations are blocking */
-    ZSTD_c_jobSize=401,      /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
-                              * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
-                              * 0 means default, which is dynamically determined based on compression parameters.
-                              * Job size must be a minimum of overlap size, or ZSTDMT_JOBSIZE_MIN (= 512 KB), whichever is largest.
-                              * The minimum size is automatically and transparently enforced. */
-    ZSTD_c_overlapLog=402,   /* Control the overlap size, as a fraction of window size.
-                              * The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
-                              * It helps preserve compression ratio, while each job is compressed in parallel.
-                              * This value is enforced only when nbWorkers >= 1.
-                              * Larger values increase compression ratio, but decrease speed.
-                              * Possible values range from 0 to 9 :
-                              * - 0 means "default" : value will be determined by the library, depending on strategy
-                              * - 1 means "no overlap"
-                              * - 9 means "full overlap", using a full window size.
-                              * Each intermediate rank increases/decreases load size by a factor 2 :
-                              * 9: full window;  8: w/2;  7: w/4;  6: w/8;  5:w/16;  4: w/32;  3:w/64;  2:w/128;  1:no overlap;  0:default
-                              * default value varies between 6 and 9, depending on strategy */
-
-    /* note : additional experimental parameters are also available
-     * within the experimental section of the API.
-     * At the time of this writing, they include :
-     * ZSTD_c_rsyncable
-     * ZSTD_c_format
-     * ZSTD_c_forceMaxWindow
-     * ZSTD_c_forceAttachDict
-     * ZSTD_c_literalCompressionMode
-     * ZSTD_c_targetCBlockSize
-     * ZSTD_c_srcSizeHint
-     * ZSTD_c_enableDedicatedDictSearch
-     * ZSTD_c_stableInBuffer
-     * ZSTD_c_stableOutBuffer
-     * ZSTD_c_blockDelimiters
-     * ZSTD_c_validateSequences
-     * ZSTD_c_useBlockSplitter
-     * ZSTD_c_useRowMatchFinder
-     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
-     * note : never ever use experimentalParam? names directly;
-     *        also, the enums values themselves are unstable and can still change.
-     */
-     ZSTD_c_experimentalParam1=500,
-     ZSTD_c_experimentalParam2=10,
-     ZSTD_c_experimentalParam3=1000,
-     ZSTD_c_experimentalParam4=1001,
-     ZSTD_c_experimentalParam5=1002,
-     ZSTD_c_experimentalParam6=1003,
-     ZSTD_c_experimentalParam7=1004,
-     ZSTD_c_experimentalParam8=1005,
-     ZSTD_c_experimentalParam9=1006,
-     ZSTD_c_experimentalParam10=1007,
-     ZSTD_c_experimentalParam11=1008,
-     ZSTD_c_experimentalParam12=1009,
-     ZSTD_c_experimentalParam13=1010,
-     ZSTD_c_experimentalParam14=1011,
-     ZSTD_c_experimentalParam15=1012
-} ZSTD_cParameter;
-
-typedef struct {
-    size_t error;
-    int lowerBound;
-    int upperBound;
-} ZSTD_bounds;
-
-/*! ZSTD_cParam_getBounds() :
- *  All parameters must belong to an interval with lower and upper bounds,
- *  otherwise they will either trigger an error or be automatically clamped.
- * @return : a structure, ZSTD_bounds, which contains
- *         - an error status field, which must be tested using ZSTD_isError()
- *         - lower and upper bounds, both inclusive
- */
-ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam);
-
-/*! ZSTD_CCtx_setParameter() :
- *  Set one compression parameter, selected by enum ZSTD_cParameter.
- *  All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds().
- *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
- *  Setting a parameter is generally only possible during frame initialization (before starting compression).
- *  Exception : when using multi-threading mode (nbWorkers >= 1),
- *              the following parameters can be updated _during_ compression (within same frame):
- *              => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
- *              new parameters will be active for next job only (after a flush()).
- * @return : an error code (which can be tested using ZSTD_isError()).
- */
-ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value);
-
-/*! ZSTD_CCtx_setPledgedSrcSize() :
- *  Total input data size to be compressed as a single frame.
- *  Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag.
- *  This value will also be controlled at end of frame, and trigger an error if not respected.
- * @result : 0, or an error code (which can be tested with ZSTD_isError()).
- *  Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame.
- *           In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN.
- *           ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame.
- *  Note 2 : pledgedSrcSize is only valid once, for the next frame.
- *           It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN.
- *  Note 3 : Whenever all input data is provided and consumed in a single round,
- *           for example with ZSTD_compress2(),
- *           or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end),
- *           this value is automatically overridden by srcSize instead.
- */
-ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
-
-typedef enum {
-    ZSTD_reset_session_only = 1,
-    ZSTD_reset_parameters = 2,
-    ZSTD_reset_session_and_parameters = 3
-} ZSTD_ResetDirective;
-
-/*! ZSTD_CCtx_reset() :
- *  There are 2 different things that can be reset, independently or jointly :
- *  - The session : will stop compressing current frame, and make CCtx ready to start a new one.
- *                  Useful after an error, or to interrupt any ongoing compression.
- *                  Any internal data not yet flushed is cancelled.
- *                  Compression parameters and dictionary remain unchanged.
- *                  They will be used to compress next frame.
- *                  Resetting session never fails.
- *  - The parameters : changes all parameters back to "default".
- *                  This removes any reference to any dictionary too.
- *                  Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing)
- *                  otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError())
- *  - Both : similar to resetting the session, followed by resetting parameters.
- */
-ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset);
-
-/*! ZSTD_compress2() :
- *  Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API.
- *  ZSTD_compress2() always starts a new frame.
- *  Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
- *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
- *  - The function is always blocking, returns when compression is completed.
- *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
- * @return : compressed size written into `dst` (<= `dstCapacity),
- *           or an error code if it fails (which can be tested using ZSTD_isError()).
- */
-ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx,
-                                   void* dst, size_t dstCapacity,
-                             const void* src, size_t srcSize);
-
-
-/***********************************************
-*  Advanced decompression API (Requires v1.4.0+)
-************************************************/
-
-/* The advanced API pushes parameters one by one into an existing DCtx context.
- * Parameters are sticky, and remain valid for all following frames
- * using the same DCtx context.
- * It's possible to reset parameters to default values using ZSTD_DCtx_reset().
- * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream().
- *        Therefore, no new decompression function is necessary.
- */
-
-typedef enum {
-
-    ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which
-                              * the streaming API will refuse to allocate memory buffer
-                              * in order to protect the host from unreasonable memory requirements.
-                              * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
-                              * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT).
-                              * Special: value 0 means "use default maximum windowLog". */
-
-    /* note : additional experimental parameters are also available
-     * within the experimental section of the API.
-     * At the time of this writing, they include :
-     * ZSTD_d_format
-     * ZSTD_d_stableOutBuffer
-     * ZSTD_d_forceIgnoreChecksum
-     * ZSTD_d_refMultipleDDicts
-     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
-     * note : never ever use experimentalParam? names directly
-     */
-     ZSTD_d_experimentalParam1=1000,
-     ZSTD_d_experimentalParam2=1001,
-     ZSTD_d_experimentalParam3=1002,
-     ZSTD_d_experimentalParam4=1003
-
-} ZSTD_dParameter;
-
-/*! ZSTD_dParam_getBounds() :
- *  All parameters must belong to an interval with lower and upper bounds,
- *  otherwise they will either trigger an error or be automatically clamped.
- * @return : a structure, ZSTD_bounds, which contains
- *         - an error status field, which must be tested using ZSTD_isError()
- *         - both lower and upper bounds, inclusive
- */
-ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam);
-
-/*! ZSTD_DCtx_setParameter() :
- *  Set one compression parameter, selected by enum ZSTD_dParameter.
- *  All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds().
- *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
- *  Setting a parameter is only possible during frame initialization (before starting decompression).
- * @return : 0, or an error code (which can be tested using ZSTD_isError()).
- */
-ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value);
-
-/*! ZSTD_DCtx_reset() :
- *  Return a DCtx to clean state.
- *  Session and parameters can be reset jointly or separately.
- *  Parameters can only be reset when no active frame is being decompressed.
- * @return : 0, or an error code, which can be tested with ZSTD_isError()
- */
-ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset);
-
-
-/****************************
-*  Streaming
-****************************/
-
-typedef struct ZSTD_inBuffer_s {
-  const void* src;    /**< start of input buffer */
-  size_t size;        /**< size of input buffer */
-  size_t pos;         /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */
-} ZSTD_inBuffer;
-
-typedef struct ZSTD_outBuffer_s {
-  void*  dst;         /**< start of output buffer */
-  size_t size;        /**< size of output buffer */
-  size_t pos;         /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */
-} ZSTD_outBuffer;
-
-
-
-/*-***********************************************************************
-*  Streaming compression - HowTo
-*
-*  A ZSTD_CStream object is required to track streaming operation.
-*  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
-*  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
-*  It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
-*
-*  For parallel execution, use one separate ZSTD_CStream per thread.
-*
-*  note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
-*
-*  Parameters are sticky : when starting a new compression on the same context,
-*  it will re-use the same sticky parameters as previous compression session.
-*  When in doubt, it's recommended to fully initialize the context before usage.
-*  Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(),
-*  ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to
-*  set more specific parameters, the pledged source size, or load a dictionary.
-*
-*  Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to
-*  consume input stream. The function will automatically update both `pos`
-*  fields within `input` and `output`.
-*  Note that the function may not consume the entire input, for example, because
-*  the output buffer is already full, in which case `input.pos < input.size`.
-*  The caller must check if input has been entirely consumed.
-*  If not, the caller must make some room to receive more compressed data,
-*  and then present again remaining input data.
-*  note: ZSTD_e_continue is guaranteed to make some forward progress when called,
-*        but doesn't guarantee maximal forward progress. This is especially relevant
-*        when compressing with multiple threads. The call won't block if it can
-*        consume some input, but if it can't it will wait for some, but not all,
-*        output to be flushed.
-* @return : provides a minimum amount of data remaining to be flushed from internal buffers
-*           or an error code, which can be tested using ZSTD_isError().
-*
-*  At any moment, it's possible to flush whatever data might remain stuck within internal buffer,
-*  using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated.
-*  Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0).
-*  In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush.
-*  You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the
-*  operation.
-*  note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will
-*        block until the flush is complete or the output buffer is full.
-*  @return : 0 if internal buffers are entirely flushed,
-*            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
-*            or an error code, which can be tested using ZSTD_isError().
-*
-*  Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame.
-*  It will perform a flush and write frame epilogue.
-*  The epilogue is required for decoders to consider a frame completed.
-*  flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush.
-*  You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to
-*  start a new frame.
-*  note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will
-*        block until the flush is complete or the output buffer is full.
-*  @return : 0 if frame fully completed and fully flushed,
-*            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
-*            or an error code, which can be tested using ZSTD_isError().
-*
-* *******************************************************************/
-
-typedef ZSTD_CCtx ZSTD_CStream;  /**< CCtx and CStream are now effectively same object (>= v1.3.0) */
-                                 /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */
-/*===== ZSTD_CStream management functions =====*/
-ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
-ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);  /* accept NULL pointer */
-
-/*===== Streaming compression functions =====*/
-typedef enum {
-    ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */
-    ZSTD_e_flush=1,    /* flush any data provided so far,
-                        * it creates (at least) one new block, that can be decoded immediately on reception;
-                        * frame will continue: any future data can still reference previously compressed data, improving compression.
-                        * note : multithreaded compression will block to flush as much output as possible. */
-    ZSTD_e_end=2       /* flush any remaining data _and_ close current frame.
-                        * note that frame is only closed after compressed data is fully flushed (return value == 0).
-                        * After that point, any additional data starts a new frame.
-                        * note : each frame is independent (does not reference any content from previous frame).
-                        : note : multithreaded compression will block to flush as much output as possible. */
-} ZSTD_EndDirective;
-
-/*! ZSTD_compressStream2() : Requires v1.4.0+
- *  Behaves about the same as ZSTD_compressStream, with additional control on end directive.
- *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
- *  - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
- *  - output->pos must be <= dstCapacity, input->pos must be <= srcSize
- *  - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit.
- *  - endOp must be a valid directive
- *  - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller.
- *  - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available,
- *                                                  and then immediately returns, just indicating that there is some data remaining to be flushed.
- *                                                  The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
- *  - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking.
- *  - @return provides a minimum amount of data remaining to be flushed from internal buffers
- *            or an error code, which can be tested using ZSTD_isError().
- *            if @return != 0, flush is not fully completed, there is still some data left within internal buffers.
- *            This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers.
- *            For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed.
- *  - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0),
- *            only ZSTD_e_end or ZSTD_e_flush operations are allowed.
- *            Before starting a new compression job, or changing compression parameters,
- *            it is required to fully flush internal buffers.
- */
-ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
-                                         ZSTD_outBuffer* output,
-                                         ZSTD_inBuffer* input,
-                                         ZSTD_EndDirective endOp);
-
-
-/* These buffer sizes are softly recommended.
- * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output.
- * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(),
- * reducing the amount of memory shuffling and buffering, resulting in minor performance savings.
- *
- * However, note that these recommendations are from the perspective of a C caller program.
- * If the streaming interface is invoked from some other language,
- * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo,
- * a major performance rule is to reduce crossing such interface to an absolute minimum.
- * It's not rare that performance ends being spent more into the interface, rather than compression itself.
- * In which cases, prefer using large buffers, as large as practical,
- * for both input and output, to reduce the nb of roundtrips.
- */
-ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
-ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */
-
-
-/* *****************************************************************************
- * This following is a legacy streaming API, available since v1.0+ .
- * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
- * It is redundant, but remains fully supported.
- * Streaming in combination with advanced parameters and dictionary compression
- * can only be used through the new API.
- ******************************************************************************/
-
-/*!
- * Equivalent to:
- *
- *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
- *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
- */
-ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
-/*!
- * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
- * NOTE: The return value is different. ZSTD_compressStream() returns a hint for
- * the next read size (if non-zero and not an error). ZSTD_compressStream2()
- * returns the minimum nb of bytes left to flush (if non-zero and not an error).
- */
-ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
-/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
-ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
-/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
-ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
-
-
-/*-***************************************************************************
-*  Streaming decompression - HowTo
-*
-*  A ZSTD_DStream object is required to track streaming operations.
-*  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
-*  ZSTD_DStream objects can be re-used multiple times.
-*
-*  Use ZSTD_initDStream() to start a new decompression operation.
-* @return : recommended first input size
-*  Alternatively, use advanced API to set specific properties.
-*
-*  Use ZSTD_decompressStream() repetitively to consume your input.
-*  The function will update both `pos` fields.
-*  If `input.pos < input.size`, some input has not been consumed.
-*  It's up to the caller to present again remaining data.
-*  The function tries to flush all data decoded immediately, respecting output buffer size.
-*  If `output.pos < output.size`, decoder has flushed everything it could.
-*  But if `output.pos == output.size`, there might be some data left within internal buffers.,
-*  In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer.
-*  Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX.
-* @return : 0 when a frame is completely decoded and fully flushed,
-*        or an error code, which can be tested using ZSTD_isError(),
-*        or any other value > 0, which means there is still some decoding or flushing to do to complete current frame :
-*                                the return value is a suggested next input size (just a hint for better latency)
-*                                that will never request more than the remaining frame size.
-* *******************************************************************************/
-
-typedef ZSTD_DCtx ZSTD_DStream;  /**< DCtx and DStream are now effectively same object (>= v1.3.0) */
-                                 /* For compatibility with versions <= v1.2.0, prefer differentiating them. */
-/*===== ZSTD_DStream management functions =====*/
-ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
-ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);  /* accept NULL pointer */
-
-/*===== Streaming decompression functions =====*/
-
-/* This function is redundant with the advanced API and equivalent to:
- *
- *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
- *     ZSTD_DCtx_refDDict(zds, NULL);
- */
-ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
-
-ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
-
-ZSTDLIB_API size_t ZSTD_DStreamInSize(void);    /*!< recommended size for input buffer */
-ZSTDLIB_API size_t ZSTD_DStreamOutSize(void);   /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */
-
-
-/**************************
-*  Simple dictionary API
-***************************/
-/*! ZSTD_compress_usingDict() :
- *  Compression at an explicit compression level using a Dictionary.
- *  A dictionary can be any arbitrary data segment (also called a prefix),
- *  or a buffer with specified information (see zdict.h).
- *  Note : This function loads the dictionary, resulting in significant startup delay.
- *         It's intended for a dictionary used only once.
- *  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */
-ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
-                                           void* dst, size_t dstCapacity,
-                                     const void* src, size_t srcSize,
-                                     const void* dict,size_t dictSize,
-                                           int compressionLevel);
-
-/*! ZSTD_decompress_usingDict() :
- *  Decompression using a known Dictionary.
- *  Dictionary must be identical to the one used during compression.
- *  Note : This function loads the dictionary, resulting in significant startup delay.
- *         It's intended for a dictionary used only once.
- *  Note : When `dict == NULL || dictSize < 8` no dictionary is used. */
-ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
-                                             void* dst, size_t dstCapacity,
-                                       const void* src, size_t srcSize,
-                                       const void* dict,size_t dictSize);
-
-
-/***********************************
- *  Bulk processing dictionary API
- **********************************/
-typedef struct ZSTD_CDict_s ZSTD_CDict;
-
-/*! ZSTD_createCDict() :
- *  When compressing multiple messages or blocks using the same dictionary,
- *  it's recommended to digest the dictionary only once, since it's a costly operation.
- *  ZSTD_createCDict() will create a state from digesting a dictionary.
- *  The resulting state can be used for future compression operations with very limited startup cost.
- *  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
- * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
- *  Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
- *  Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
- *      in which case the only thing that it transports is the @compressionLevel.
- *      This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
- *      expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */
-ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
-                                         int compressionLevel);
-
-/*! ZSTD_freeCDict() :
- *  Function frees memory allocated by ZSTD_createCDict().
- *  If a NULL pointer is passed, no operation is performed. */
-ZSTDLIB_API size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
-
-/*! ZSTD_compress_usingCDict() :
- *  Compression using a digested Dictionary.
- *  Recommended when same dictionary is used multiple times.
- *  Note : compression level is _decided at dictionary creation time_,
- *     and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */
-ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
-                                            void* dst, size_t dstCapacity,
-                                      const void* src, size_t srcSize,
-                                      const ZSTD_CDict* cdict);
-
-
-typedef struct ZSTD_DDict_s ZSTD_DDict;
-
-/*! ZSTD_createDDict() :
- *  Create a digested dictionary, ready to start decompression operation without startup delay.
- *  dictBuffer can be released after DDict creation, as its content is copied inside DDict. */
-ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
-
-/*! ZSTD_freeDDict() :
- *  Function frees memory allocated with ZSTD_createDDict()
- *  If a NULL pointer is passed, no operation is performed. */
-ZSTDLIB_API size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
-
-/*! ZSTD_decompress_usingDDict() :
- *  Decompression using a digested Dictionary.
- *  Recommended when same dictionary is used multiple times. */
-ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
-                                              void* dst, size_t dstCapacity,
-                                        const void* src, size_t srcSize,
-                                        const ZSTD_DDict* ddict);
-
-
-/********************************
- *  Dictionary helper functions
- *******************************/
-
-/*! ZSTD_getDictID_fromDict() : Requires v1.4.0+
- *  Provides the dictID stored within dictionary.
- *  if @return == 0, the dictionary is not conformant with Zstandard specification.
- *  It can still be loaded, but as a content-only dictionary. */
-ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
-
-/*! ZSTD_getDictID_fromCDict() : Requires v1.5.0+
- *  Provides the dictID of the dictionary loaded into `cdict`.
- *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
- *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
-ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
-
-/*! ZSTD_getDictID_fromDDict() : Requires v1.4.0+
- *  Provides the dictID of the dictionary loaded into `ddict`.
- *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
- *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
-ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
-
-/*! ZSTD_getDictID_fromFrame() : Requires v1.4.0+
- *  Provides the dictID required to decompressed the frame stored within `src`.
- *  If @return == 0, the dictID could not be decoded.
- *  This could for one of the following reasons :
- *  - The frame does not require a dictionary to be decoded (most common case).
- *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
- *    Note : this use case also happens when using a non-conformant dictionary.
- *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
- *  - This is not a Zstandard frame.
- *  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */
-ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
-
-
-/*******************************************************************************
- * Advanced dictionary and prefix API (Requires v1.4.0+)
- *
- * This API allows dictionaries to be used with ZSTD_compress2(),
- * ZSTD_compressStream2(), and ZSTD_decompressDCtx(). Dictionaries are sticky, and
- * only reset with the context is reset with ZSTD_reset_parameters or
- * ZSTD_reset_session_and_parameters. Prefixes are single-use.
- ******************************************************************************/
-
-
-/*! ZSTD_CCtx_loadDictionary() : Requires v1.4.0+
- *  Create an internal CDict from `dict` buffer.
- *  Decompression will have to use same dictionary.
- * @result : 0, or an error code (which can be tested with ZSTD_isError()).
- *  Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary,
- *           meaning "return to no-dictionary mode".
- *  Note 1 : Dictionary is sticky, it will be used for all future compressed frames.
- *           To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters).
- *  Note 2 : Loading a dictionary involves building tables.
- *           It's also a CPU consuming operation, with non-negligible impact on latency.
- *           Tables are dependent on compression parameters, and for this reason,
- *           compression parameters can no longer be changed after loading a dictionary.
- *  Note 3 :`dict` content will be copied internally.
- *           Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead.
- *           In such a case, dictionary buffer must outlive its users.
- *  Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
- *           to precisely select how dictionary content must be interpreted. */
-ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
-
-/*! ZSTD_CCtx_refCDict() : Requires v1.4.0+
- *  Reference a prepared dictionary, to be used for all next compressed frames.
- *  Note that compression parameters are enforced from within CDict,
- *  and supersede any compression parameter previously set within CCtx.
- *  The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
- *  The ignored parameters will be used again if the CCtx is returned to no-dictionary mode.
- *  The dictionary will remain valid for future compressed frames using same CCtx.
- * @result : 0, or an error code (which can be tested with ZSTD_isError()).
- *  Special : Referencing a NULL CDict means "return to no-dictionary mode".
- *  Note 1 : Currently, only one dictionary can be managed.
- *           Referencing a new dictionary effectively "discards" any previous one.
- *  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */
-ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
-
-/*! ZSTD_CCtx_refPrefix() : Requires v1.4.0+
- *  Reference a prefix (single-usage dictionary) for next compressed frame.
- *  A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end).
- *  Decompression will need same prefix to properly regenerate data.
- *  Compressing with a prefix is similar in outcome as performing a diff and compressing it,
- *  but performs much faster, especially during decompression (compression speed is tunable with compression level).
- * @result : 0, or an error code (which can be tested with ZSTD_isError()).
- *  Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
- *  Note 1 : Prefix buffer is referenced. It **must** outlive compression.
- *           Its content must remain unmodified during compression.
- *  Note 2 : If the intention is to diff some large src data blob with some prior version of itself,
- *           ensure that the window size is large enough to contain the entire source.
- *           See ZSTD_c_windowLog.
- *  Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
- *           It's a CPU consuming operation, with non-negligible impact on latency.
- *           If there is a need to use the same prefix multiple times, consider loadDictionary instead.
- *  Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent).
- *           Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */
-ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
-                                 const void* prefix, size_t prefixSize);
-
-/*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+
- *  Create an internal DDict from dict buffer,
- *  to be used to decompress next frames.
- *  The dictionary remains valid for all future frames, until explicitly invalidated.
- * @result : 0, or an error code (which can be tested with ZSTD_isError()).
- *  Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
- *            meaning "return to no-dictionary mode".
- *  Note 1 : Loading a dictionary involves building tables,
- *           which has a non-negligible impact on CPU usage and latency.
- *           It's recommended to "load once, use many times", to amortize the cost
- *  Note 2 :`dict` content will be copied internally, so `dict` can be released after loading.
- *           Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead.
- *  Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of
- *           how dictionary content is loaded and interpreted.
- */
-ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
-
-/*! ZSTD_DCtx_refDDict() : Requires v1.4.0+
- *  Reference a prepared dictionary, to be used to decompress next frames.
- *  The dictionary remains active for decompression of future frames using same DCtx.
- *
- *  If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function
- *  will store the DDict references in a table, and the DDict used for decompression
- *  will be determined at decompression time, as per the dict ID in the frame.
- *  The memory for the table is allocated on the first call to refDDict, and can be
- *  freed with ZSTD_freeDCtx().
- *
- * @result : 0, or an error code (which can be tested with ZSTD_isError()).
- *  Note 1 : Currently, only one dictionary can be managed.
- *           Referencing a new dictionary effectively "discards" any previous one.
- *  Special: referencing a NULL DDict means "return to no-dictionary mode".
- *  Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
- */
-ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
-
-/*! ZSTD_DCtx_refPrefix() : Requires v1.4.0+
- *  Reference a prefix (single-usage dictionary) to decompress next frame.
- *  This is the reverse operation of ZSTD_CCtx_refPrefix(),
- *  and must use the same prefix as the one used during compression.
- *  Prefix is **only used once**. Reference is discarded at end of frame.
- *  End of frame is reached when ZSTD_decompressStream() returns 0.
- * @result : 0, or an error code (which can be tested with ZSTD_isError()).
- *  Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary
- *  Note 2 : Prefix buffer is referenced. It **must** outlive decompression.
- *           Prefix buffer must remain unmodified up to the end of frame,
- *           reached when ZSTD_decompressStream() returns 0.
- *  Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent).
- *           Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
- *  Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
- *           A full dictionary is more costly, as it requires building tables.
- */
-ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx,
-                                 const void* prefix, size_t prefixSize);
-
-/* ===   Memory management   === */
-
-/*! ZSTD_sizeof_*() : Requires v1.4.0+
- *  These functions give the _current_ memory usage of selected object.
- *  Note that object memory usage can evolve (increase or decrease) over time. */
-ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
-ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
-ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
-ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
-ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
-ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
-
-#endif  /* ZSTD_H_235446 */
-
-
-/* **************************************************************************************
- *   ADVANCED AND EXPERIMENTAL FUNCTIONS
- ****************************************************************************************
- * The definitions in the following section are considered experimental.
- * They are provided for advanced scenarios.
- * They should never be used with a dynamic library, as prototypes may change in the future.
- * Use them only in association with static linking.
- * ***************************************************************************************/
-
-#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
-#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
-
-/* This can be overridden externally to hide static symbols. */
-#ifndef ZSTDLIB_STATIC_API
-#  if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
-#    define ZSTDLIB_STATIC_API __declspec(dllexport) ZSTDLIB_VISIBLE
-#  elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
-#    define ZSTDLIB_STATIC_API __declspec(dllimport) ZSTDLIB_VISIBLE
-#  else
-#    define ZSTDLIB_STATIC_API ZSTDLIB_VISIBLE
-#  endif
-#endif
-
-/* Deprecation warnings :
- * Should these warnings be a problem, it is generally possible to disable them,
- * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual.
- * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS.
- */
-#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS
-#  define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API  /* disable deprecation warnings */
-#else
-#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
-#    define ZSTD_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_STATIC_API
-#  elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
-#    define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __attribute__((deprecated(message)))
-#  elif defined(__GNUC__) && (__GNUC__ >= 3)
-#    define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __attribute__((deprecated))
-#  elif defined(_MSC_VER)
-#    define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __declspec(deprecated(message))
-#  else
-#    pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler")
-#    define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API
-#  endif
-#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */
-
-/****************************************************************************************
- *   experimental API (static linking only)
- ****************************************************************************************
- * The following symbols and constants
- * are not planned to join "stable API" status in the near future.
- * They can still change in future versions.
- * Some of them are planned to remain in the static_only section indefinitely.
- * Some of them might be removed in the future (especially when redundant with existing stable functions)
- * ***************************************************************************************/
-
-#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1)   /* minimum input size required to query frame header size */
-#define ZSTD_FRAMEHEADERSIZE_MIN(format)    ((format) == ZSTD_f_zstd1 ? 6 : 2)
-#define ZSTD_FRAMEHEADERSIZE_MAX   18   /* can be useful for static allocation */
-#define ZSTD_SKIPPABLEHEADERSIZE    8
-
-/* compression parameter bounds */
-#define ZSTD_WINDOWLOG_MAX_32    30
-#define ZSTD_WINDOWLOG_MAX_64    31
-#define ZSTD_WINDOWLOG_MAX     ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
-#define ZSTD_WINDOWLOG_MIN       10
-#define ZSTD_HASHLOG_MAX       ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30)
-#define ZSTD_HASHLOG_MIN          6
-#define ZSTD_CHAINLOG_MAX_32     29
-#define ZSTD_CHAINLOG_MAX_64     30
-#define ZSTD_CHAINLOG_MAX      ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64))
-#define ZSTD_CHAINLOG_MIN        ZSTD_HASHLOG_MIN
-#define ZSTD_SEARCHLOG_MAX      (ZSTD_WINDOWLOG_MAX-1)
-#define ZSTD_SEARCHLOG_MIN        1
-#define ZSTD_MINMATCH_MAX         7   /* only for ZSTD_fast, other strategies are limited to 6 */
-#define ZSTD_MINMATCH_MIN         3   /* only for ZSTD_btopt+, faster strategies are limited to 4 */
-#define ZSTD_TARGETLENGTH_MAX    ZSTD_BLOCKSIZE_MAX
-#define ZSTD_TARGETLENGTH_MIN     0   /* note : comparing this constant to an unsigned results in a tautological test */
-#define ZSTD_STRATEGY_MIN        ZSTD_fast
-#define ZSTD_STRATEGY_MAX        ZSTD_btultra2
-
-
-#define ZSTD_OVERLAPLOG_MIN       0
-#define ZSTD_OVERLAPLOG_MAX       9
-
-#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27   /* by default, the streaming decoder will refuse any frame
-                                           * requiring larger than (1<<ZSTD_WINDOWLOG_LIMIT_DEFAULT) window size,
-                                           * to preserve host's memory from unreasonable requirements.
-                                           * This limit can be overridden using ZSTD_DCtx_setParameter(,ZSTD_d_windowLogMax,).
-                                           * The limit does not apply for one-pass decoders (such as ZSTD_decompress()), since no additional memory is allocated */
-
-
-/* LDM parameter bounds */
-#define ZSTD_LDM_HASHLOG_MIN      ZSTD_HASHLOG_MIN
-#define ZSTD_LDM_HASHLOG_MAX      ZSTD_HASHLOG_MAX
-#define ZSTD_LDM_MINMATCH_MIN        4
-#define ZSTD_LDM_MINMATCH_MAX     4096
-#define ZSTD_LDM_BUCKETSIZELOG_MIN   1
-#define ZSTD_LDM_BUCKETSIZELOG_MAX   8
-#define ZSTD_LDM_HASHRATELOG_MIN     0
-#define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
-
-/* Advanced parameter bounds */
-#define ZSTD_TARGETCBLOCKSIZE_MIN   64
-#define ZSTD_TARGETCBLOCKSIZE_MAX   ZSTD_BLOCKSIZE_MAX
-#define ZSTD_SRCSIZEHINT_MIN        0
-#define ZSTD_SRCSIZEHINT_MAX        INT_MAX
-
-
-/* ---  Advanced types  --- */
-
-typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
-
-typedef struct {
-    unsigned int offset;      /* The offset of the match. (NOT the same as the offset code)
-                               * If offset == 0 and matchLength == 0, this sequence represents the last
-                               * literals in the block of litLength size.
-                               */
-
-    unsigned int litLength;   /* Literal length of the sequence. */
-    unsigned int matchLength; /* Match length of the sequence. */
-
-                              /* Note: Users of this API may provide a sequence with matchLength == litLength == offset == 0.
-                               * In this case, we will treat the sequence as a marker for a block boundary.
-                               */
-
-    unsigned int rep;         /* Represents which repeat offset is represented by the field 'offset'.
-                               * Ranges from [0, 3].
-                               *
-                               * Repeat offsets are essentially previous offsets from previous sequences sorted in
-                               * recency order. For more detail, see doc/zstd_compression_format.md
-                               *
-                               * If rep == 0, then 'offset' does not contain a repeat offset.
-                               * If rep > 0:
-                               *  If litLength != 0:
-                               *      rep == 1 --> offset == repeat_offset_1
-                               *      rep == 2 --> offset == repeat_offset_2
-                               *      rep == 3 --> offset == repeat_offset_3
-                               *  If litLength == 0:
-                               *      rep == 1 --> offset == repeat_offset_2
-                               *      rep == 2 --> offset == repeat_offset_3
-                               *      rep == 3 --> offset == repeat_offset_1 - 1
-                               *
-                               * Note: This field is optional. ZSTD_generateSequences() will calculate the value of
-                               * 'rep', but repeat offsets do not necessarily need to be calculated from an external
-                               * sequence provider's perspective. For example, ZSTD_compressSequences() does not
-                               * use this 'rep' field at all (as of now).
-                               */
-} ZSTD_Sequence;
-
-typedef struct {
-    unsigned windowLog;       /**< largest match distance : larger == more compression, more memory needed during decompression */
-    unsigned chainLog;        /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
-    unsigned hashLog;         /**< dispatch table : larger == faster, more memory */
-    unsigned searchLog;       /**< nb of searches : larger == more compression, slower */
-    unsigned minMatch;        /**< match length searched : larger == faster decompression, sometimes less compression */
-    unsigned targetLength;    /**< acceptable match size for optimal parser (only) : larger == more compression, slower */
-    ZSTD_strategy strategy;   /**< see ZSTD_strategy definition above */
-} ZSTD_compressionParameters;
-
-typedef struct {
-    int contentSizeFlag; /**< 1: content size will be in frame header (when known) */
-    int checksumFlag;    /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */
-    int noDictIDFlag;    /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */
-} ZSTD_frameParameters;
-
-typedef struct {
-    ZSTD_compressionParameters cParams;
-    ZSTD_frameParameters fParams;
-} ZSTD_parameters;
-
-typedef enum {
-    ZSTD_dct_auto = 0,       /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */
-    ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */
-    ZSTD_dct_fullDict = 2    /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */
-} ZSTD_dictContentType_e;
-
-typedef enum {
-    ZSTD_dlm_byCopy = 0,  /**< Copy dictionary content internally */
-    ZSTD_dlm_byRef = 1    /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
-} ZSTD_dictLoadMethod_e;
-
-typedef enum {
-    ZSTD_f_zstd1 = 0,           /* zstd frame format, specified in zstd_compression_format.md (default) */
-    ZSTD_f_zstd1_magicless = 1  /* Variant of zstd frame format, without initial 4-bytes magic number.
-                                 * Useful to save 4 bytes per generated frame.
-                                 * Decoder cannot recognise automatically this format, requiring this instruction. */
-} ZSTD_format_e;
-
-typedef enum {
-    /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */
-    ZSTD_d_validateChecksum = 0,
-    ZSTD_d_ignoreChecksum = 1
-} ZSTD_forceIgnoreChecksum_e;
-
-typedef enum {
-    /* Note: this enum controls ZSTD_d_refMultipleDDicts */
-    ZSTD_rmd_refSingleDDict = 0,
-    ZSTD_rmd_refMultipleDDicts = 1
-} ZSTD_refMultipleDDicts_e;
-
-typedef enum {
-    /* Note: this enum and the behavior it controls are effectively internal
-     * implementation details of the compressor. They are expected to continue
-     * to evolve and should be considered only in the context of extremely
-     * advanced performance tuning.
-     *
-     * Zstd currently supports the use of a CDict in three ways:
-     *
-     * - The contents of the CDict can be copied into the working context. This
-     *   means that the compression can search both the dictionary and input
-     *   while operating on a single set of internal tables. This makes
-     *   the compression faster per-byte of input. However, the initial copy of
-     *   the CDict's tables incurs a fixed cost at the beginning of the
-     *   compression. For small compressions (< 8 KB), that copy can dominate
-     *   the cost of the compression.
-     *
-     * - The CDict's tables can be used in-place. In this model, compression is
-     *   slower per input byte, because the compressor has to search two sets of
-     *   tables. However, this model incurs no start-up cost (as long as the
-     *   working context's tables can be reused). For small inputs, this can be
-     *   faster than copying the CDict's tables.
-     *
-     * - The CDict's tables are not used at all, and instead we use the working
-     *   context alone to reload the dictionary and use params based on the source
-     *   size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
-     *   This method is effective when the dictionary sizes are very small relative
-     *   to the input size, and the input size is fairly large to begin with.
-     *
-     * Zstd has a simple internal heuristic that selects which strategy to use
-     * at the beginning of a compression. However, if experimentation shows that
-     * Zstd is making poor choices, it is possible to override that choice with
-     * this enum.
-     */
-    ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
-    ZSTD_dictForceAttach   = 1, /* Never copy the dictionary. */
-    ZSTD_dictForceCopy     = 2, /* Always copy the dictionary. */
-    ZSTD_dictForceLoad     = 3  /* Always reload the dictionary */
-} ZSTD_dictAttachPref_e;
-
-typedef enum {
-  ZSTD_lcm_auto = 0,          /**< Automatically determine the compression mode based on the compression level.
-                               *   Negative compression levels will be uncompressed, and positive compression
-                               *   levels will be compressed. */
-  ZSTD_lcm_huffman = 1,       /**< Always attempt Huffman compression. Uncompressed literals will still be
-                               *   emitted if Huffman compression is not profitable. */
-  ZSTD_lcm_uncompressed = 2   /**< Always emit uncompressed literals. */
-} ZSTD_literalCompressionMode_e;
-
-typedef enum {
-  /* Note: This enum controls features which are conditionally beneficial. Zstd typically will make a final
-   * decision on whether or not to enable the feature (ZSTD_ps_auto), but setting the switch to ZSTD_ps_enable
-   * or ZSTD_ps_disable allow for a force enable/disable the feature.
-   */
-  ZSTD_ps_auto = 0,         /* Let the library automatically determine whether the feature shall be enabled */
-  ZSTD_ps_enable = 1,       /* Force-enable the feature */
-  ZSTD_ps_disable = 2       /* Do not use the feature */
-} ZSTD_paramSwitch_e;
-
-/***************************************
-*  Frame size functions
-***************************************/
-
-/*! ZSTD_findDecompressedSize() :
- *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
- *  `srcSize` must be the _exact_ size of this series
- *       (i.e. there should be a frame boundary at `src + srcSize`)
- *  @return : - decompressed size of all data in all successive frames
- *            - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN
- *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
- *
- *   note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
- *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
- *            In which case, it's necessary to use streaming mode to decompress data.
- *   note 2 : decompressed size is always present when compression is done with ZSTD_compress()
- *   note 3 : decompressed size can be very large (64-bits value),
- *            potentially larger than what local system can handle as a single memory segment.
- *            In which case, it's necessary to use streaming mode to decompress data.
- *   note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
- *            Always ensure result fits within application's authorized limits.
- *            Each application can set its own limits.
- *   note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
- *            read each contained frame header.  This is fast as most of the data is skipped,
- *            however it does mean that all frame data must be present and valid. */
-ZSTDLIB_STATIC_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
-
-/*! ZSTD_decompressBound() :
- *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
- *  `srcSize` must be the _exact_ size of this series
- *       (i.e. there should be a frame boundary at `src + srcSize`)
- *  @return : - upper-bound for the decompressed size of all data in all successive frames
- *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
- *
- *  note 1  : an error can occur if `src` contains an invalid or incorrectly formatted frame.
- *  note 2  : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`.
- *            in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value.
- *  note 3  : when the decompressed size field isn't available, the upper-bound for that frame is calculated by:
- *              upper-bound = # blocks * min(128 KB, Window_Size)
- */
-ZSTDLIB_STATIC_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
-
-/*! ZSTD_frameHeaderSize() :
- *  srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
- * @return : size of the Frame Header,
- *           or an error code (if srcSize is too small) */
-ZSTDLIB_STATIC_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
-
-typedef enum {
-  ZSTD_sf_noBlockDelimiters = 0,         /* Representation of ZSTD_Sequence has no block delimiters, sequences only */
-  ZSTD_sf_explicitBlockDelimiters = 1    /* Representation of ZSTD_Sequence contains explicit block delimiters */
-} ZSTD_sequenceFormat_e;
-
-/*! ZSTD_generateSequences() :
- * Generate sequences using ZSTD_compress2, given a source buffer.
- *
- * Each block will end with a dummy sequence
- * with offset == 0, matchLength == 0, and litLength == length of last literals.
- * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
- * simply acts as a block delimiter.
- *
- * zc can be used to insert custom compression params.
- * This function invokes ZSTD_compress2
- *
- * The output of this function can be fed into ZSTD_compressSequences() with CCtx
- * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters
- * @return : number of sequences generated
- */
-
-ZSTDLIB_STATIC_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
-                                          size_t outSeqsSize, const void* src, size_t srcSize);
-
-/*! ZSTD_mergeBlockDelimiters() :
- * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
- * by merging them into into the literals of the next sequence.
- *
- * As such, the final generated result has no explicit representation of block boundaries,
- * and the final last literals segment is not represented in the sequences.
- *
- * The output of this function can be fed into ZSTD_compressSequences() with CCtx
- * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters
- * @return : number of sequences left after merging
- */
-ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
-
-/*! ZSTD_compressSequences() :
- * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst.
- * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.)
- * The entire source is compressed into a single frame.
- *
- * The compression behavior changes based on cctx params. In particular:
- *    If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain
- *    no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on
- *    the block size derived from the cctx, and sequences may be split. This is the default setting.
- *
- *    If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain
- *    block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
- *
- *    If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined
- *    behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
- *    specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
- *
- *    In addition to the two adjustable experimental params, there are other important cctx params.
- *    - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN.
- *    - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression.
- *    - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset
- *      is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
- *
- * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
- * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly,
- *         and cannot emit an RLE block that disagrees with the repcode history
- * @return : final compressed size or a ZSTD error.
- */
-ZSTDLIB_STATIC_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize,
-                                  const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
-                                  const void* src, size_t srcSize);
-
-
-/*! ZSTD_writeSkippableFrame() :
- * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer.
- *
- * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number,
- * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15.
- * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so
- * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant.
- *
- * Returns an error if destination buffer is not large enough, if the source size is not representable
- * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid).
- *
- * @return : number of bytes written or a ZSTD error.
- */
-ZSTDLIB_STATIC_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
-                                            const void* src, size_t srcSize, unsigned magicVariant);
-
-/*! ZSTD_readSkippableFrame() :
- * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer.
- *
- * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
- * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START.  This can be NULL if the caller is not interested
- * in the magicVariant.
- *
- * Returns an error if destination buffer is not large enough, or if the frame is not skippable.
- *
- * @return : number of bytes written or a ZSTD error.
- */
-ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant,
-                                            const void* src, size_t srcSize);
-
-/*! ZSTD_isSkippableFrame() :
- *  Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame.
- */
-ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size);
-
-
-
-/***************************************
-*  Memory management
-***************************************/
-
-/*! ZSTD_estimate*() :
- *  These functions make it possible to estimate memory usage
- *  of a future {D,C}Ctx, before its creation.
- *
- *  ZSTD_estimateCCtxSize() will provide a memory budget large enough
- *  for any compression level up to selected one.
- *  Note : Unlike ZSTD_estimateCStreamSize*(), this estimate
- *         does not include space for a window buffer.
- *         Therefore, the estimation is only guaranteed for single-shot compressions, not streaming.
- *  The estimate will assume the input may be arbitrarily large,
- *  which is the worst case.
- *
- *  When srcSize can be bound by a known and rather "small" value,
- *  this fact can be used to provide a tighter estimation
- *  because the CCtx compression context will need less memory.
- *  This tighter estimation can be provided by more advanced functions
- *  ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(),
- *  and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter().
- *  Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits.
- *
- *  Note 2 : only single-threaded compression is supported.
- *  ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
- */
-ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
-ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
-ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
-ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void);
-
-/*! ZSTD_estimateCStreamSize() :
- *  ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one.
- *  It will also consider src size to be arbitrarily "large", which is worst case.
- *  If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
- *  ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
- *  ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
- *  Note : CStream size estimation is only correct for single-threaded compression.
- *  ZSTD_DStream memory budget depends on window Size.
- *  This information can be passed manually, using ZSTD_estimateDStreamSize,
- *  or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
- *  Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
- *         an internal ?Dict will be created, which additional size is not estimated here.
- *         In this case, get total size by adding ZSTD_estimate?DictSize */
-ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
-ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
-ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
-ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
-ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
-
-/*! ZSTD_estimate?DictSize() :
- *  ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict().
- *  ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced().
- *  Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller.
- */
-ZSTDLIB_STATIC_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
-ZSTDLIB_STATIC_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod);
-ZSTDLIB_STATIC_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod);
-
-/*! ZSTD_initStatic*() :
- *  Initialize an object using a pre-allocated fixed-size buffer.
- *  workspace: The memory area to emplace the object into.
- *             Provided pointer *must be 8-bytes aligned*.
- *             Buffer must outlive object.
- *  workspaceSize: Use ZSTD_estimate*Size() to determine
- *                 how large workspace must be to support target scenario.
- * @return : pointer to object (same address as workspace, just different type),
- *           or NULL if error (size too small, incorrect alignment, etc.)
- *  Note : zstd will never resize nor malloc() when using a static buffer.
- *         If the object requires more memory than available,
- *         zstd will just error out (typically ZSTD_error_memory_allocation).
- *  Note 2 : there is no corresponding "free" function.
- *           Since workspace is allocated externally, it must be freed externally too.
- *  Note 3 : cParams : use ZSTD_getCParams() to convert a compression level
- *           into its associated cParams.
- *  Limitation 1 : currently not compatible with internal dictionary creation, triggered by
- *                 ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict().
- *  Limitation 2 : static cctx currently not compatible with multi-threading.
- *  Limitation 3 : static dctx is incompatible with legacy support.
- */
-ZSTDLIB_STATIC_API ZSTD_CCtx*    ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
-ZSTDLIB_STATIC_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticCCtx() */
-
-ZSTDLIB_STATIC_API ZSTD_DCtx*    ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize);
-ZSTDLIB_STATIC_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticDCtx() */
-
-ZSTDLIB_STATIC_API const ZSTD_CDict* ZSTD_initStaticCDict(
-                                        void* workspace, size_t workspaceSize,
-                                        const void* dict, size_t dictSize,
-                                        ZSTD_dictLoadMethod_e dictLoadMethod,
-                                        ZSTD_dictContentType_e dictContentType,
-                                        ZSTD_compressionParameters cParams);
-
-ZSTDLIB_STATIC_API const ZSTD_DDict* ZSTD_initStaticDDict(
-                                        void* workspace, size_t workspaceSize,
-                                        const void* dict, size_t dictSize,
-                                        ZSTD_dictLoadMethod_e dictLoadMethod,
-                                        ZSTD_dictContentType_e dictContentType);
-
-
-/*! Custom memory allocation :
- *  These prototypes make it possible to pass your own allocation/free functions.
- *  ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below.
- *  All allocation/free operations will be completed using these custom variants instead of regular <stdlib.h> ones.
- */
-typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
-typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
-typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
-static
-#ifdef __GNUC__
-__attribute__((__unused__))
-#endif
-ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /**< this constant defers to stdlib's functions */
-
-ZSTDLIB_STATIC_API ZSTD_CCtx*    ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
-ZSTDLIB_STATIC_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
-ZSTDLIB_STATIC_API ZSTD_DCtx*    ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
-ZSTDLIB_STATIC_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
-
-ZSTDLIB_STATIC_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
-                                                  ZSTD_dictLoadMethod_e dictLoadMethod,
-                                                  ZSTD_dictContentType_e dictContentType,
-                                                  ZSTD_compressionParameters cParams,
-                                                  ZSTD_customMem customMem);
-
-/*! Thread pool :
- *  These prototypes make it possible to share a thread pool among multiple compression contexts.
- *  This can limit resources for applications with multiple threads where each one uses
- *  a threaded compression mode (via ZSTD_c_nbWorkers parameter).
- *  ZSTD_createThreadPool creates a new thread pool with a given number of threads.
- *  Note that the lifetime of such pool must exist while being used.
- *  ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value
- *  to use an internal thread pool).
- *  ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer.
- */
-typedef struct POOL_ctx_s ZSTD_threadPool;
-ZSTDLIB_STATIC_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads);
-ZSTDLIB_STATIC_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool);  /* accept NULL pointer */
-ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool);
-
-
-/*
- * This API is temporary and is expected to change or disappear in the future!
- */
-ZSTDLIB_STATIC_API ZSTD_CDict* ZSTD_createCDict_advanced2(
-    const void* dict, size_t dictSize,
-    ZSTD_dictLoadMethod_e dictLoadMethod,
-    ZSTD_dictContentType_e dictContentType,
-    const ZSTD_CCtx_params* cctxParams,
-    ZSTD_customMem customMem);
-
-ZSTDLIB_STATIC_API ZSTD_DDict* ZSTD_createDDict_advanced(
-    const void* dict, size_t dictSize,
-    ZSTD_dictLoadMethod_e dictLoadMethod,
-    ZSTD_dictContentType_e dictContentType,
-    ZSTD_customMem customMem);
-
-
-/***************************************
-*  Advanced compression functions
-***************************************/
-
-/*! ZSTD_createCDict_byReference() :
- *  Create a digested dictionary for compression
- *  Dictionary content is just referenced, not duplicated.
- *  As a consequence, `dictBuffer` **must** outlive CDict,
- *  and its content must remain unmodified throughout the lifetime of CDict.
- *  note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
-ZSTDLIB_STATIC_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
-
-/*! ZSTD_getCParams() :
- * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
- * `estimatedSrcSize` value is optional, select 0 if not known */
-ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
-
-/*! ZSTD_getParams() :
- *  same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`.
- *  All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */
-ZSTDLIB_STATIC_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
-
-/*! ZSTD_checkCParams() :
- *  Ensure param values remain within authorized range.
- * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */
-ZSTDLIB_STATIC_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
-
-/*! ZSTD_adjustCParams() :
- *  optimize params for a given `srcSize` and `dictSize`.
- * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN.
- * `dictSize` must be `0` when there is no dictionary.
- *  cPar can be invalid : all parameters will be clamped within valid range in the @return struct.
- *  This function never fails (wide contract) */
-ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
-
-/*! ZSTD_compress_advanced() :
- *  Note : this function is now DEPRECATED.
- *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
- *  This prototype will generate compilation warnings. */
-ZSTD_DEPRECATED("use ZSTD_compress2")
-size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
-                                          void* dst, size_t dstCapacity,
-                                    const void* src, size_t srcSize,
-                                    const void* dict,size_t dictSize,
-                                          ZSTD_parameters params);
-
-/*! ZSTD_compress_usingCDict_advanced() :
- *  Note : this function is now DEPRECATED.
- *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
- *  This prototype will generate compilation warnings. */
-ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary")
-size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
-                                              void* dst, size_t dstCapacity,
-                                        const void* src, size_t srcSize,
-                                        const ZSTD_CDict* cdict,
-                                              ZSTD_frameParameters fParams);
-
-
-/*! ZSTD_CCtx_loadDictionary_byReference() :
- *  Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx.
- *  It saves some memory, but also requires that `dict` outlives its usage within `cctx` */
-ZSTDLIB_STATIC_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
-
-/*! ZSTD_CCtx_loadDictionary_advanced() :
- *  Same as ZSTD_CCtx_loadDictionary(), but gives finer control over
- *  how to load the dictionary (by copy ? by reference ?)
- *  and how to interpret it (automatic ? force raw mode ? full mode only ?) */
-ZSTDLIB_STATIC_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
-
-/*! ZSTD_CCtx_refPrefix_advanced() :
- *  Same as ZSTD_CCtx_refPrefix(), but gives finer control over
- *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
-ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
-
-/* ===   experimental parameters   === */
-/* these parameters can be used with ZSTD_setParameter()
- * they are not guaranteed to remain supported in the future */
-
- /* Enables rsyncable mode,
-  * which makes compressed files more rsync friendly
-  * by adding periodic synchronization points to the compressed data.
-  * The target average block size is ZSTD_c_jobSize / 2.
-  * It's possible to modify the job size to increase or decrease
-  * the granularity of the synchronization point.
-  * Once the jobSize is smaller than the window size,
-  * it will result in compression ratio degradation.
-  * NOTE 1: rsyncable mode only works when multithreading is enabled.
-  * NOTE 2: rsyncable performs poorly in combination with long range mode,
-  * since it will decrease the effectiveness of synchronization points,
-  * though mileage may vary.
-  * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s.
-  * If the selected compression level is already running significantly slower,
-  * the overall speed won't be significantly impacted.
-  */
- #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1
-
-/* Select a compression format.
- * The value must be of type ZSTD_format_e.
- * See ZSTD_format_e enum definition for details */
-#define ZSTD_c_format ZSTD_c_experimentalParam2
-
-/* Force back-reference distances to remain < windowSize,
- * even when referencing into Dictionary content (default:0) */
-#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3
-
-/* Controls whether the contents of a CDict
- * are used in place, or copied into the working context.
- * Accepts values from the ZSTD_dictAttachPref_e enum.
- * See the comments on that enum for an explanation of the feature. */
-#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4
-
-/* Controlled with ZSTD_paramSwitch_e enum.
- * Default is ZSTD_ps_auto.
- * Set to ZSTD_ps_disable to never compress literals.
- * Set to ZSTD_ps_enable to always compress literals. (Note: uncompressed literals
- * may still be emitted if huffman is not beneficial to use.)
- *
- * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use
- * literals compression based on the compression parameters - specifically,
- * negative compression levels do not use literal compression.
- */
-#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
-
-/* Tries to fit compressed block size to be around targetCBlockSize.
- * No target when targetCBlockSize == 0.
- * There is no guarantee on compressed block size (default:0) */
-#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
-
-/* User's best guess of source size.
- * Hint is not valid when srcSizeHint == 0.
- * There is no guarantee that hint is close to actual source size,
- * but compression ratio may regress significantly if guess considerably underestimates */
-#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
-
-/* Controls whether the new and experimental "dedicated dictionary search
- * structure" can be used. This feature is still rough around the edges, be
- * prepared for surprising behavior!
- *
- * How to use it:
- *
- * When using a CDict, whether to use this feature or not is controlled at
- * CDict creation, and it must be set in a CCtxParams set passed into that
- * construction (via ZSTD_createCDict_advanced2()). A compression will then
- * use the feature or not based on how the CDict was constructed; the value of
- * this param, set in the CCtx, will have no effect.
- *
- * However, when a dictionary buffer is passed into a CCtx, such as via
- * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control
- * whether the CDict that is created internally can use the feature or not.
- *
- * What it does:
- *
- * Normally, the internal data structures of the CDict are analogous to what
- * would be stored in a CCtx after compressing the contents of a dictionary.
- * To an approximation, a compression using a dictionary can then use those
- * data structures to simply continue what is effectively a streaming
- * compression where the simulated compression of the dictionary left off.
- * Which is to say, the search structures in the CDict are normally the same
- * format as in the CCtx.
- *
- * It is possible to do better, since the CDict is not like a CCtx: the search
- * structures are written once during CDict creation, and then are only read
- * after that, while the search structures in the CCtx are both read and
- * written as the compression goes along. This means we can choose a search
- * structure for the dictionary that is read-optimized.
- *
- * This feature enables the use of that different structure.
- *
- * Note that some of the members of the ZSTD_compressionParameters struct have
- * different semantics and constraints in the dedicated search structure. It is
- * highly recommended that you simply set a compression level in the CCtxParams
- * you pass into the CDict creation call, and avoid messing with the cParams
- * directly.
- *
- * Effects:
- *
- * This will only have any effect when the selected ZSTD_strategy
- * implementation supports this feature. Currently, that's limited to
- * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2.
- *
- * Note that this means that the CDict tables can no longer be copied into the
- * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be
- * usable. The dictionary can only be attached or reloaded.
- *
- * In general, you should expect compression to be faster--sometimes very much
- * so--and CDict creation to be slightly slower. Eventually, we will probably
- * make this mode the default.
- */
-#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8
-
-/* ZSTD_c_stableInBuffer
- * Experimental parameter.
- * Default is 0 == disabled. Set to 1 to enable.
- *
- * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same
- * between calls, except for the modifications that zstd makes to pos (the
- * caller must not modify pos). This is checked by the compressor, and
- * compression will fail if it ever changes. This means the only flush
- * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end
- * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos)
- * MUST not be modified during compression or you will get data corruption.
- *
- * When this flag is enabled zstd won't allocate an input window buffer,
- * because the user guarantees it can reference the ZSTD_inBuffer until
- * the frame is complete. But, it will still allocate an output buffer
- * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also
- * avoid the memcpy() from the input buffer to the input window buffer.
- *
- * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used.
- * That means this flag cannot be used with ZSTD_compressStream().
- *
- * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using
- * this flag is ALWAYS memory safe, and will never access out-of-bounds
- * memory. However, compression WILL fail if you violate the preconditions.
- *
- * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST
- * not be modified during compression or you will get data corruption. This
- * is because zstd needs to reference data in the ZSTD_inBuffer to find
- * matches. Normally zstd maintains its own window buffer for this purpose,
- * but passing this flag tells zstd to use the user provided buffer.
- */
-#define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9
-
-/* ZSTD_c_stableOutBuffer
- * Experimental parameter.
- * Default is 0 == disabled. Set to 1 to enable.
- *
- * Tells he compressor that the ZSTD_outBuffer will not be resized between
- * calls. Specifically: (out.size - out.pos) will never grow. This gives the
- * compressor the freedom to say: If the compressed data doesn't fit in the
- * output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to
- * always decompress directly into the output buffer, instead of decompressing
- * into an internal buffer and copying to the output buffer.
- *
- * When this flag is enabled zstd won't allocate an output buffer, because
- * it can write directly to the ZSTD_outBuffer. It will still allocate the
- * input window buffer (see ZSTD_c_stableInBuffer).
- *
- * Zstd will check that (out.size - out.pos) never grows and return an error
- * if it does. While not strictly necessary, this should prevent surprises.
- */
-#define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10
-
-/* ZSTD_c_blockDelimiters
- * Default is 0 == ZSTD_sf_noBlockDelimiters.
- *
- * For use with sequence compression API: ZSTD_compressSequences().
- *
- * Designates whether or not the given array of ZSTD_Sequence contains block delimiters
- * and last literals, which are defined as sequences with offset == 0 and matchLength == 0.
- * See the definition of ZSTD_Sequence for more specifics.
- */
-#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11
-
-/* ZSTD_c_validateSequences
- * Default is 0 == disabled. Set to 1 to enable sequence validation.
- *
- * For use with sequence compression API: ZSTD_compressSequences().
- * Designates whether or not we validate sequences provided to ZSTD_compressSequences()
- * during function execution.
- *
- * Without validation, providing a sequence that does not conform to the zstd spec will cause
- * undefined behavior, and may produce a corrupted block.
- *
- * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for
- * specifics regarding offset/matchlength requirements) then the function will bail out and
- * return an error.
- *
- */
-#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12
-
-/* ZSTD_c_useBlockSplitter
- * Controlled with ZSTD_paramSwitch_e enum.
- * Default is ZSTD_ps_auto.
- * Set to ZSTD_ps_disable to never use block splitter.
- * Set to ZSTD_ps_enable to always use block splitter.
- *
- * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use
- * block splitting based on the compression parameters.
- */
-#define ZSTD_c_useBlockSplitter ZSTD_c_experimentalParam13
-
-/* ZSTD_c_useRowMatchFinder
- * Controlled with ZSTD_paramSwitch_e enum.
- * Default is ZSTD_ps_auto.
- * Set to ZSTD_ps_disable to never use row-based matchfinder.
- * Set to ZSTD_ps_enable to force usage of row-based matchfinder.
- *
- * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use
- * the row-based matchfinder based on support for SIMD instructions and the window log.
- * Note that this only pertains to compression strategies: greedy, lazy, and lazy2
- */
-#define ZSTD_c_useRowMatchFinder ZSTD_c_experimentalParam14
-
-/* ZSTD_c_deterministicRefPrefix
- * Default is 0 == disabled. Set to 1 to enable.
- *
- * Zstd produces different results for prefix compression when the prefix is
- * directly adjacent to the data about to be compressed vs. when it isn't.
- * This is because zstd detects that the two buffers are contiguous and it can
- * use a more efficient match finding algorithm. However, this produces different
- * results than when the two buffers are non-contiguous. This flag forces zstd
- * to always load the prefix in non-contiguous mode, even if it happens to be
- * adjacent to the data, to guarantee determinism.
- *
- * If you really care about determinism when using a dictionary or prefix,
- * like when doing delta compression, you should select this option. It comes
- * at a speed penalty of about ~2.5% if the dictionary and data happened to be
- * contiguous, and is free if they weren't contiguous. We don't expect that
- * intentionally making the dictionary and data contiguous will be worth the
- * cost to memcpy() the data.
- */
-#define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15
-
-/*! ZSTD_CCtx_getParameter() :
- *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
- *  and store it into int* value.
- * @return : 0, or an error code (which can be tested with ZSTD_isError()).
- */
-ZSTDLIB_STATIC_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
-
-
-/*! ZSTD_CCtx_params :
- *  Quick howto :
- *  - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure
- *  - ZSTD_CCtxParams_setParameter() : Push parameters one by one into
- *                                     an existing ZSTD_CCtx_params structure.
- *                                     This is similar to
- *                                     ZSTD_CCtx_setParameter().
- *  - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to
- *                                    an existing CCtx.
- *                                    These parameters will be applied to
- *                                    all subsequent frames.
- *  - ZSTD_compressStream2() : Do compression using the CCtx.
- *  - ZSTD_freeCCtxParams() : Free the memory, accept NULL pointer.
- *
- *  This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
- *  for static allocation of CCtx for single-threaded compression.
- */
-ZSTDLIB_STATIC_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
-ZSTDLIB_STATIC_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);  /* accept NULL pointer */
-
-/*! ZSTD_CCtxParams_reset() :
- *  Reset params to default values.
- */
-ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
-
-/*! ZSTD_CCtxParams_init() :
- *  Initializes the compression parameters of cctxParams according to
- *  compression level. All other parameters are reset to their default values.
- */
-ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
-
-/*! ZSTD_CCtxParams_init_advanced() :
- *  Initializes the compression and frame parameters of cctxParams according to
- *  params. All other parameters are reset to their default values.
- */
-ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
-
-/*! ZSTD_CCtxParams_setParameter() : Requires v1.4.0+
- *  Similar to ZSTD_CCtx_setParameter.
- *  Set one compression parameter, selected by enum ZSTD_cParameter.
- *  Parameters must be applied to a ZSTD_CCtx using
- *  ZSTD_CCtx_setParametersUsingCCtxParams().
- * @result : a code representing success or failure (which can be tested with
- *           ZSTD_isError()).
- */
-ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
-
-/*! ZSTD_CCtxParams_getParameter() :
- * Similar to ZSTD_CCtx_getParameter.
- * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
- * @result : 0, or an error code (which can be tested with ZSTD_isError()).
- */
-ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
-
-/*! ZSTD_CCtx_setParametersUsingCCtxParams() :
- *  Apply a set of ZSTD_CCtx_params to the compression context.
- *  This can be done even after compression is started,
- *    if nbWorkers==0, this will have no impact until a new compression is started.
- *    if nbWorkers>=1, new parameters will be picked up at next job,
- *       with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated).
- */
-ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(
-        ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
-
-/*! ZSTD_compressStream2_simpleArgs() :
- *  Same as ZSTD_compressStream2(),
- *  but using only integral types as arguments.
- *  This variant might be helpful for binders from dynamic languages
- *  which have troubles handling structures containing memory pointers.
- */
-ZSTDLIB_STATIC_API size_t ZSTD_compressStream2_simpleArgs (
-                            ZSTD_CCtx* cctx,
-                            void* dst, size_t dstCapacity, size_t* dstPos,
-                      const void* src, size_t srcSize, size_t* srcPos,
-                            ZSTD_EndDirective endOp);
-
-
-/***************************************
-*  Advanced decompression functions
-***************************************/
-
-/*! ZSTD_isFrame() :
- *  Tells if the content of `buffer` starts with a valid Frame Identifier.
- *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
- *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
- *  Note 3 : Skippable Frame Identifiers are considered valid. */
-ZSTDLIB_STATIC_API unsigned ZSTD_isFrame(const void* buffer, size_t size);
-
-/*! ZSTD_createDDict_byReference() :
- *  Create a digested dictionary, ready to start decompression operation without startup delay.
- *  Dictionary content is referenced, and therefore stays in dictBuffer.
- *  It is important that dictBuffer outlives DDict,
- *  it must remain read accessible throughout the lifetime of DDict */
-ZSTDLIB_STATIC_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
-
-/*! ZSTD_DCtx_loadDictionary_byReference() :
- *  Same as ZSTD_DCtx_loadDictionary(),
- *  but references `dict` content instead of copying it into `dctx`.
- *  This saves memory if `dict` remains around.,
- *  However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */
-ZSTDLIB_STATIC_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
-
-/*! ZSTD_DCtx_loadDictionary_advanced() :
- *  Same as ZSTD_DCtx_loadDictionary(),
- *  but gives direct control over
- *  how to load the dictionary (by copy ? by reference ?)
- *  and how to interpret it (automatic ? force raw mode ? full mode only ?). */
-ZSTDLIB_STATIC_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
-
-/*! ZSTD_DCtx_refPrefix_advanced() :
- *  Same as ZSTD_DCtx_refPrefix(), but gives finer control over
- *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
-ZSTDLIB_STATIC_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
-
-/*! ZSTD_DCtx_setMaxWindowSize() :
- *  Refuses allocating internal buffers for frames requiring a window size larger than provided limit.
- *  This protects a decoder context from reserving too much memory for itself (potential attack scenario).
- *  This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
- *  By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT)
- * @return : 0, or an error code (which can be tested using ZSTD_isError()).
- */
-ZSTDLIB_STATIC_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
-
-/*! ZSTD_DCtx_getParameter() :
- *  Get the requested decompression parameter value, selected by enum ZSTD_dParameter,
- *  and store it into int* value.
- * @return : 0, or an error code (which can be tested with ZSTD_isError()).
- */
-ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value);
-
-/* ZSTD_d_format
- * experimental parameter,
- * allowing selection between ZSTD_format_e input compression formats
- */
-#define ZSTD_d_format ZSTD_d_experimentalParam1
-/* ZSTD_d_stableOutBuffer
- * Experimental parameter.
- * Default is 0 == disabled. Set to 1 to enable.
- *
- * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same
- * between calls, except for the modifications that zstd makes to pos (the
- * caller must not modify pos). This is checked by the decompressor, and
- * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer
- * MUST be large enough to fit the entire decompressed frame. This will be
- * checked when the frame content size is known. The data in the ZSTD_outBuffer
- * in the range [dst, dst + pos) MUST not be modified during decompression
- * or you will get data corruption.
- *
- * When this flags is enabled zstd won't allocate an output buffer, because
- * it can write directly to the ZSTD_outBuffer, but it will still allocate
- * an input buffer large enough to fit any compressed block. This will also
- * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer.
- * If you need to avoid the input buffer allocation use the buffer-less
- * streaming API.
- *
- * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using
- * this flag is ALWAYS memory safe, and will never access out-of-bounds
- * memory. However, decompression WILL fail if you violate the preconditions.
- *
- * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST
- * not be modified during decompression or you will get data corruption. This
- * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate
- * matches. Normally zstd maintains its own buffer for this purpose, but passing
- * this flag tells zstd to use the user provided buffer.
- */
-#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2
-
-/* ZSTD_d_forceIgnoreChecksum
- * Experimental parameter.
- * Default is 0 == disabled. Set to 1 to enable
- *
- * Tells the decompressor to skip checksum validation during decompression, regardless
- * of whether checksumming was specified during compression. This offers some
- * slight performance benefits, and may be useful for debugging.
- * Param has values of type ZSTD_forceIgnoreChecksum_e
- */
-#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3
-
-/* ZSTD_d_refMultipleDDicts
- * Experimental parameter.
- * Default is 0 == disabled. Set to 1 to enable
- *
- * If enabled and dctx is allocated on the heap, then additional memory will be allocated
- * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict()
- * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead
- * store all references. At decompression time, the appropriate dictID is selected
- * from the set of DDicts based on the dictID in the frame.
- *
- * Usage is simply calling ZSTD_refDDict() on multiple dict buffers.
- *
- * Param has values of byte ZSTD_refMultipleDDicts_e
- *
- * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory
- * allocation for the hash table. ZSTD_freeDCtx() also frees this memory.
- * Memory is allocated as per ZSTD_DCtx::customMem.
- *
- * Although this function allocates memory for the table, the user is still responsible for
- * memory management of the underlying ZSTD_DDict* themselves.
- */
-#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4
-
-
-/*! ZSTD_DCtx_setFormat() :
- *  This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter().
- *  Instruct the decoder context about what kind of data to decode next.
- *  This instruction is mandatory to decode data without a fully-formed header,
- *  such ZSTD_f_zstd1_magicless for example.
- * @return : 0, or an error code (which can be tested using ZSTD_isError()). */
-ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead")
-size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
-
-/*! ZSTD_decompressStream_simpleArgs() :
- *  Same as ZSTD_decompressStream(),
- *  but using only integral types as arguments.
- *  This can be helpful for binders from dynamic languages
- *  which have troubles handling structures containing memory pointers.
- */
-ZSTDLIB_STATIC_API size_t ZSTD_decompressStream_simpleArgs (
-                            ZSTD_DCtx* dctx,
-                            void* dst, size_t dstCapacity, size_t* dstPos,
-                      const void* src, size_t srcSize, size_t* srcPos);
-
-
-/********************************************************************
-*  Advanced streaming functions
-*  Warning : most of these functions are now redundant with the Advanced API.
-*  Once Advanced API reaches "stable" status,
-*  redundant functions will be deprecated, and then at some point removed.
-********************************************************************/
-
-/*=====   Advanced Streaming compression functions  =====*/
-
-/*! ZSTD_initCStream_srcSize() :
- * This function is DEPRECATED, and equivalent to:
- *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
- *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
- *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
- *
- * pledgedSrcSize must be correct. If it is not known at init time, use
- * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
- * "0" also disables frame content size field. It may be enabled in the future.
- * This prototype will generate compilation warnings.
- */
-ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
-size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
-                         int compressionLevel,
-                         unsigned long long pledgedSrcSize);
-
-/*! ZSTD_initCStream_usingDict() :
- * This function is DEPRECATED, and is equivalent to:
- *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
- *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
- *
- * Creates of an internal CDict (incompatible with static CCtx), except if
- * dict == NULL or dictSize < 8, in which case no dict is used.
- * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
- * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
- * This prototype will generate compilation warnings.
- */
-ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
-size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
-                     const void* dict, size_t dictSize,
-                           int compressionLevel);
-
-/*! ZSTD_initCStream_advanced() :
- * This function is DEPRECATED, and is approximately equivalent to:
- *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- *     // Pseudocode: Set each zstd parameter and leave the rest as-is.
- *     for ((param, value) : params) {
- *         ZSTD_CCtx_setParameter(zcs, param, value);
- *     }
- *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
- *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
- *
- * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
- * pledgedSrcSize must be correct.
- * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
- * This prototype will generate compilation warnings.
- */
-ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
-size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
-                    const void* dict, size_t dictSize,
-                          ZSTD_parameters params,
-                          unsigned long long pledgedSrcSize);
-
-/*! ZSTD_initCStream_usingCDict() :
- * This function is DEPRECATED, and equivalent to:
- *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- *     ZSTD_CCtx_refCDict(zcs, cdict);
- *
- * note : cdict will just be referenced, and must outlive compression session
- * This prototype will generate compilation warnings.
- */
-ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
-size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
-
-/*! ZSTD_initCStream_usingCDict_advanced() :
- *   This function is DEPRECATED, and is approximately equivalent to:
- *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- *     // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
- *     for ((fParam, value) : fParams) {
- *         ZSTD_CCtx_setParameter(zcs, fParam, value);
- *     }
- *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
- *     ZSTD_CCtx_refCDict(zcs, cdict);
- *
- * same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
- * pledgedSrcSize must be correct. If srcSize is not known at init time, use
- * value ZSTD_CONTENTSIZE_UNKNOWN.
- * This prototype will generate compilation warnings.
- */
-ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
-size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
-                               const ZSTD_CDict* cdict,
-                                     ZSTD_frameParameters fParams,
-                                     unsigned long long pledgedSrcSize);
-
-/*! ZSTD_resetCStream() :
- * This function is DEPRECATED, and is equivalent to:
- *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
- *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
- * Note: ZSTD_resetCStream() interprets pledgedSrcSize == 0 as ZSTD_CONTENTSIZE_UNKNOWN, but
- *       ZSTD_CCtx_setPledgedSrcSize() does not do the same, so ZSTD_CONTENTSIZE_UNKNOWN must be
- *       explicitly specified.
- *
- *  start a new frame, using same parameters from previous frame.
- *  This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
- *  Note that zcs must be init at least once before using ZSTD_resetCStream().
- *  If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
- *  If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
- *  For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
- *  but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
- * @return : 0, or an error code (which can be tested using ZSTD_isError())
- *  This prototype will generate compilation warnings.
- */
-ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
-size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
-
-
-typedef struct {
-    unsigned long long ingested;   /* nb input bytes read and buffered */
-    unsigned long long consumed;   /* nb input bytes actually compressed */
-    unsigned long long produced;   /* nb of compressed bytes generated and buffered */
-    unsigned long long flushed;    /* nb of compressed bytes flushed : not provided; can be tracked from caller side */
-    unsigned currentJobID;         /* MT only : latest started job nb */
-    unsigned nbActiveWorkers;      /* MT only : nb of workers actively compressing at probe time */
-} ZSTD_frameProgression;
-
-/* ZSTD_getFrameProgression() :
- * tells how much data has been ingested (read from input)
- * consumed (input actually compressed) and produced (output) for current frame.
- * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed.
- * Aggregates progression inside active worker threads.
- */
-ZSTDLIB_STATIC_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx);
-
-/*! ZSTD_toFlushNow() :
- *  Tell how many bytes are ready to be flushed immediately.
- *  Useful for multithreading scenarios (nbWorkers >= 1).
- *  Probe the oldest active job, defined as oldest job not yet entirely flushed,
- *  and check its output buffer.
- * @return : amount of data stored in oldest job and ready to be flushed immediately.
- *  if @return == 0, it means either :
- *  + there is no active job (could be checked with ZSTD_frameProgression()), or
- *  + oldest job is still actively compressing data,
- *    but everything it has produced has also been flushed so far,
- *    therefore flush speed is limited by production speed of oldest job
- *    irrespective of the speed of concurrent (and newer) jobs.
- */
-ZSTDLIB_STATIC_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
-
-
-/*=====   Advanced Streaming decompression functions  =====*/
-
-/*!
- * This function is deprecated, and is equivalent to:
- *
- *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
- *     ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
- *
- * note: no dictionary will be used if dict == NULL or dictSize < 8
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
- */
-ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
-
-/*!
- * This function is deprecated, and is equivalent to:
- *
- *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
- *     ZSTD_DCtx_refDDict(zds, ddict);
- *
- * note : ddict is referenced, it must outlive decompression session
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
- */
-ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
-
-/*!
- * This function is deprecated, and is equivalent to:
- *
- *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
- *
- * re-use decompression parameters from previous init; saves dictionary loading
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
- */
-ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
-
-
-/*********************************************************************
-*  Buffer-less and synchronous inner streaming functions
-*
-*  This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
-*  But it's also a complex one, with several restrictions, documented below.
-*  Prefer normal streaming API for an easier experience.
-********************************************************************* */
-
-/**
-  Buffer-less streaming compression (synchronous mode)
-
-  A ZSTD_CCtx object is required to track streaming operations.
-  Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
-  ZSTD_CCtx object can be re-used multiple times within successive compression operations.
-
-  Start by initializing a context.
-  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression.
-  It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
-
-  Then, consume your input using ZSTD_compressContinue().
-  There are some important considerations to keep in mind when using this advanced function :
-  - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only.
-  - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks.
-  - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario.
-    Worst case evaluation is provided by ZSTD_compressBound().
-    ZSTD_compressContinue() doesn't guarantee recover after a failed compression.
-  - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog).
-    It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks)
-  - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps.
-    In which case, it will "discard" the relevant memory section from its history.
-
-  Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
-  It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
-  Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders.
-
-  `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again.
-*/
-
-/*=====   Buffer-less streaming compression functions  =====*/
-ZSTDLIB_STATIC_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
-ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
-ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */
-ZSTDLIB_STATIC_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
-
-ZSTDLIB_STATIC_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-ZSTDLIB_STATIC_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-
-/* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */
-ZSTD_DEPRECATED("use advanced API to access custom parameters")
-size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
-ZSTD_DEPRECATED("use advanced API to access custom parameters")
-size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
-/**
-  Buffer-less streaming decompression (synchronous mode)
-
-  A ZSTD_DCtx object is required to track streaming operations.
-  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
-  A ZSTD_DCtx object can be re-used multiple times.
-
-  First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
-  Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
-  Data fragment must be large enough to ensure successful decoding.
- `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough.
-  @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled.
-           >0 : `srcSize` is too small, please provide at least @result bytes on next attempt.
-           errorCode, which can be tested using ZSTD_isError().
-
-  It fills a ZSTD_frameHeader structure with important information to correctly decode the frame,
-  such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`).
-  Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information.
-  As a consequence, check that values remain within valid application range.
-  For example, do not allocate memory blindly, check that `windowSize` is within expectation.
-  Each application can set its own limits, depending on local restrictions.
-  For extended interoperability, it is recommended to support `windowSize` of at least 8 MB.
-
-  ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes.
-  ZSTD_decompressContinue() is very sensitive to contiguity,
-  if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place,
-  or that previous contiguous segment is large enough to properly handle maximum back-reference distance.
-  There are multiple ways to guarantee this condition.
-
-  The most memory efficient way is to use a round buffer of sufficient size.
-  Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(),
-  which can @return an error code if required value is too large for current system (in 32-bits mode).
-  In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one,
-  up to the moment there is not enough room left in the buffer to guarantee decoding another full block,
-  which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`.
-  At which point, decoding can resume from the beginning of the buffer.
-  Note that already decoded data stored in the buffer should be flushed before being overwritten.
-
-  There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory.
-
-  Finally, if you control the compression process, you can also ignore all buffer size rules,
-  as long as the encoder and decoder progress in "lock-step",
-  aka use exactly the same buffer sizes, break contiguity at the same place, etc.
-
-  Once buffers are setup, start decompression, with ZSTD_decompressBegin().
-  If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict().
-
-  Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
-  ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue().
-  ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail.
-
- @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
-  It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item.
-  It can also be an error code, which can be tested with ZSTD_isError().
-
-  A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
-  Context can then be reset to start a new decompression.
-
-  Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType().
-  This information is not required to properly decode a frame.
-
-  == Special case : skippable frames ==
-
-  Skippable frames allow integration of user-defined data into a flow of concatenated frames.
-  Skippable frames will be ignored (skipped) by decompressor.
-  The format of skippable frames is as follows :
-  a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F
-  b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
-  c) Frame Content - any content (User Data) of length equal to Frame Size
-  For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame.
-  For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content.
-*/
-
-/*=====   Buffer-less streaming decompression functions  =====*/
-typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
-typedef struct {
-    unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
-    unsigned long long windowSize;       /* can be very large, up to <= frameContentSize */
-    unsigned blockSizeMax;
-    ZSTD_frameType_e frameType;          /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
-    unsigned headerSize;
-    unsigned dictID;
-    unsigned checksumFlag;
-} ZSTD_frameHeader;
-
-/*! ZSTD_getFrameHeader() :
- *  decode Frame Header, or requires larger `srcSize`.
- * @return : 0, `zfhPtr` is correctly filled,
- *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
- *           or an error code, which can be tested using ZSTD_isError() */
-ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
-/*! ZSTD_getFrameHeader_advanced() :
- *  same as ZSTD_getFrameHeader(),
- *  with added capability to select a format (like ZSTD_f_zstd1_magicless) */
-ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
-ZSTDLIB_STATIC_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
-
-ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
-ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
-ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
-
-ZSTDLIB_STATIC_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
-ZSTDLIB_STATIC_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-
-/* misc */
-ZSTDLIB_STATIC_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
-typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
-ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
-
-
-
-
-/* ============================ */
-/**       Block level API       */
-/* ============================ */
-
-/*!
-    Block functions produce and decode raw zstd blocks, without frame metadata.
-    Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
-    But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
-
-    A few rules to respect :
-    - Compressing and decompressing require a context structure
-      + Use ZSTD_createCCtx() and ZSTD_createDCtx()
-    - It is necessary to init context before starting
-      + compression : any ZSTD_compressBegin*() variant, including with dictionary
-      + decompression : any ZSTD_decompressBegin*() variant, including with dictionary
-      + copyCCtx() and copyDCtx() can be used too
-    - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
-      + If input is larger than a block size, it's necessary to split input data into multiple blocks
-      + For inputs larger than a single block, consider using regular ZSTD_compress() instead.
-        Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block.
-    - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) !
-      ===> In which case, nothing is produced into `dst` !
-      + User __must__ test for such outcome and deal directly with uncompressed data
-      + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0.
-        Doing so would mess up with statistics history, leading to potential data corruption.
-      + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !!
-      + In case of multiple successive blocks, should some of them be uncompressed,
-        decoder must be informed of their existence in order to follow proper history.
-        Use ZSTD_insertBlock() for such a case.
-*/
-
-/*=====   Raw zstd block functions  =====*/
-ZSTDLIB_STATIC_API size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
-ZSTDLIB_STATIC_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-ZSTDLIB_STATIC_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
-
-
-#endif   /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
-
-#if defined (__cplusplus)
-}
-#endif
diff --git a/dep/zstd/lib/zstd_errors.h b/dep/zstd/lib/zstd_errors.h
deleted file mode 100644
index fa3686b77..000000000
--- a/dep/zstd/lib/zstd_errors.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#ifndef ZSTD_ERRORS_H_398273423
-#define ZSTD_ERRORS_H_398273423
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-/*===== dependency =====*/
-#include <stddef.h>   /* size_t */
-
-
-/* =====   ZSTDERRORLIB_API : control library symbols visibility   ===== */
-#ifndef ZSTDERRORLIB_VISIBILITY
-#  if defined(__GNUC__) && (__GNUC__ >= 4)
-#    define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default")))
-#  else
-#    define ZSTDERRORLIB_VISIBILITY
-#  endif
-#endif
-#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
-#  define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY
-#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
-#  define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
-#else
-#  define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
-#endif
-
-/*-*********************************************
- *  Error codes list
- *-*********************************************
- *  Error codes _values_ are pinned down since v1.3.1 only.
- *  Therefore, don't rely on values if you may link to any version < v1.3.1.
- *
- *  Only values < 100 are considered stable.
- *
- *  note 1 : this API shall be used with static linking only.
- *           dynamic linking is not yet officially supported.
- *  note 2 : Prefer relying on the enum than on its value whenever possible
- *           This is the only supported way to use the error list < v1.3.1
- *  note 3 : ZSTD_isError() is always correct, whatever the library version.
- **********************************************/
-typedef enum {
-  ZSTD_error_no_error = 0,
-  ZSTD_error_GENERIC  = 1,
-  ZSTD_error_prefix_unknown                = 10,
-  ZSTD_error_version_unsupported           = 12,
-  ZSTD_error_frameParameter_unsupported    = 14,
-  ZSTD_error_frameParameter_windowTooLarge = 16,
-  ZSTD_error_corruption_detected = 20,
-  ZSTD_error_checksum_wrong      = 22,
-  ZSTD_error_dictionary_corrupted      = 30,
-  ZSTD_error_dictionary_wrong          = 32,
-  ZSTD_error_dictionaryCreation_failed = 34,
-  ZSTD_error_parameter_unsupported   = 40,
-  ZSTD_error_parameter_outOfBound    = 42,
-  ZSTD_error_tableLog_tooLarge       = 44,
-  ZSTD_error_maxSymbolValue_tooLarge = 46,
-  ZSTD_error_maxSymbolValue_tooSmall = 48,
-  ZSTD_error_stage_wrong       = 60,
-  ZSTD_error_init_missing      = 62,
-  ZSTD_error_memory_allocation = 64,
-  ZSTD_error_workSpace_tooSmall= 66,
-  ZSTD_error_dstSize_tooSmall = 70,
-  ZSTD_error_srcSize_wrong    = 72,
-  ZSTD_error_dstBuffer_null   = 74,
-  /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
-  ZSTD_error_frameIndex_tooLarge = 100,
-  ZSTD_error_seekableIO          = 102,
-  ZSTD_error_dstBuffer_wrong     = 104,
-  ZSTD_error_srcBuffer_wrong     = 105,
-  ZSTD_error_maxCode = 120  /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
-} ZSTD_ErrorCode;
-
-/*! ZSTD_getErrorCode() :
-    convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
-    which can be used to compare with enum list published above */
-ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
-ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);   /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */
-
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif /* ZSTD_ERRORS_H_398273423 */
diff --git a/dep/zstd/zstd.vcxproj b/dep/zstd/zstd.vcxproj
deleted file mode 100644
index c785949fd..000000000
--- a/dep/zstd/zstd.vcxproj
+++ /dev/null
@@ -1,76 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <Import Project="..\msvc\vsprops\Configurations.props" />
-  <PropertyGroup Label="Globals">
-    <ProjectGuid>{73EE0C55-6FFE-44E7-9C12-BAA52434A797}</ProjectGuid>
-  </PropertyGroup>
-  <ItemGroup>
-    <ClInclude Include="lib\common\bitstream.h" />
-    <ClInclude Include="lib\common\compiler.h" />
-    <ClInclude Include="lib\common\cpu.h" />
-    <ClInclude Include="lib\common\debug.h" />
-    <ClInclude Include="lib\common\error_private.h" />
-    <ClInclude Include="lib\common\fse.h" />
-    <ClInclude Include="lib\common\huf.h" />
-    <ClInclude Include="lib\common\mem.h" />
-    <ClInclude Include="lib\common\pool.h" />
-    <ClInclude Include="lib\common\portability_macros.h" />
-    <ClInclude Include="lib\common\threading.h" />
-    <ClInclude Include="lib\common\xxhash.h" />
-    <ClInclude Include="lib\common\zstd_deps.h" />
-    <ClInclude Include="lib\common\zstd_internal.h" />
-    <ClInclude Include="lib\common\zstd_trace.h" />
-    <ClInclude Include="lib\compress\clevels.h" />
-    <ClInclude Include="lib\compress\hist.h" />
-    <ClInclude Include="lib\compress\zstdmt_compress.h" />
-    <ClInclude Include="lib\compress\zstd_compress_internal.h" />
-    <ClInclude Include="lib\compress\zstd_compress_literals.h" />
-    <ClInclude Include="lib\compress\zstd_compress_sequences.h" />
-    <ClInclude Include="lib\compress\zstd_compress_superblock.h" />
-    <ClInclude Include="lib\compress\zstd_cwksp.h" />
-    <ClInclude Include="lib\compress\zstd_double_fast.h" />
-    <ClInclude Include="lib\compress\zstd_fast.h" />
-    <ClInclude Include="lib\compress\zstd_lazy.h" />
-    <ClInclude Include="lib\compress\zstd_ldm.h" />
-    <ClInclude Include="lib\compress\zstd_ldm_geartab.h" />
-    <ClInclude Include="lib\compress\zstd_opt.h" />
-    <ClInclude Include="lib\decompress\zstd_ddict.h" />
-    <ClInclude Include="lib\decompress\zstd_decompress_block.h" />
-    <ClInclude Include="lib\decompress\zstd_decompress_internal.h" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="lib\common\debug.c" />
-    <ClCompile Include="lib\common\entropy_common.c" />
-    <ClCompile Include="lib\common\error_private.c" />
-    <ClCompile Include="lib\common\fse_decompress.c" />
-    <ClCompile Include="lib\common\pool.c" />
-    <ClCompile Include="lib\common\threading.c" />
-    <ClCompile Include="lib\common\xxhash.c" />
-    <ClCompile Include="lib\common\zstd_common.c" />
-    <ClCompile Include="lib\compress\fse_compress.c" />
-    <ClCompile Include="lib\compress\hist.c" />
-    <ClCompile Include="lib\compress\huf_compress.c" />
-    <ClCompile Include="lib\compress\zstdmt_compress.c" />
-    <ClCompile Include="lib\compress\zstd_compress.c" />
-    <ClCompile Include="lib\compress\zstd_compress_literals.c" />
-    <ClCompile Include="lib\compress\zstd_compress_sequences.c" />
-    <ClCompile Include="lib\compress\zstd_compress_superblock.c" />
-    <ClCompile Include="lib\compress\zstd_double_fast.c" />
-    <ClCompile Include="lib\compress\zstd_fast.c" />
-    <ClCompile Include="lib\compress\zstd_lazy.c" />
-    <ClCompile Include="lib\compress\zstd_ldm.c" />
-    <ClCompile Include="lib\compress\zstd_opt.c" />
-    <ClCompile Include="lib\decompress\huf_decompress.c" />
-    <ClCompile Include="lib\decompress\zstd_ddict.c" />
-    <ClCompile Include="lib\decompress\zstd_decompress.c" />
-    <ClCompile Include="lib\decompress\zstd_decompress_block.c" />
-  </ItemGroup>
-  <Import Project="..\msvc\vsprops\StaticLibrary.props" />
-  <ItemDefinitionGroup>
-    <ClCompile>
-      <WarningLevel>TurnOffAllWarnings</WarningLevel>
-      <AdditionalIncludeDirectories>$(ProjectDir)include;$(SolutionDir)dep\zlib\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-    </ClCompile>
-  </ItemDefinitionGroup>
-  <Import Project="..\msvc\vsprops\Targets.props" />
-</Project>
\ No newline at end of file
diff --git a/dep/zstd/zstd.vcxproj.filters b/dep/zstd/zstd.vcxproj.filters
deleted file mode 100644
index 349f8109a..000000000
--- a/dep/zstd/zstd.vcxproj.filters
+++ /dev/null
@@ -1,189 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup>
-    <Filter Include="common">
-      <UniqueIdentifier>{085cb7cf-b82e-4fbc-93f1-28fd067a43be}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="decompress">
-      <UniqueIdentifier>{23a28848-3ccc-47e4-a375-cb97761304c3}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="compress">
-      <UniqueIdentifier>{90d9824f-f178-4f36-9b47-9b2471598977}</UniqueIdentifier>
-    </Filter>
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="lib\common\huf.h">
-      <Filter>common</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\common\mem.h">
-      <Filter>common</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\common\pool.h">
-      <Filter>common</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\common\portability_macros.h">
-      <Filter>common</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\common\threading.h">
-      <Filter>common</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\common\xxhash.h">
-      <Filter>common</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\common\zstd_deps.h">
-      <Filter>common</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\common\zstd_internal.h">
-      <Filter>common</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\common\zstd_trace.h">
-      <Filter>common</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\common\bitstream.h">
-      <Filter>common</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\common\compiler.h">
-      <Filter>common</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\common\cpu.h">
-      <Filter>common</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\common\debug.h">
-      <Filter>common</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\common\error_private.h">
-      <Filter>common</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\common\fse.h">
-      <Filter>common</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\compress\zstd_lazy.h">
-      <Filter>compress</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\compress\zstd_ldm.h">
-      <Filter>compress</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\compress\zstd_ldm_geartab.h">
-      <Filter>compress</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\compress\zstd_opt.h">
-      <Filter>compress</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\compress\zstdmt_compress.h">
-      <Filter>compress</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\compress\clevels.h">
-      <Filter>compress</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\compress\hist.h">
-      <Filter>compress</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\compress\zstd_compress_internal.h">
-      <Filter>compress</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\compress\zstd_compress_literals.h">
-      <Filter>compress</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\compress\zstd_compress_sequences.h">
-      <Filter>compress</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\compress\zstd_compress_superblock.h">
-      <Filter>compress</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\compress\zstd_cwksp.h">
-      <Filter>compress</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\compress\zstd_double_fast.h">
-      <Filter>compress</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\compress\zstd_fast.h">
-      <Filter>compress</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\decompress\zstd_ddict.h">
-      <Filter>decompress</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\decompress\zstd_decompress_block.h">
-      <Filter>decompress</Filter>
-    </ClInclude>
-    <ClInclude Include="lib\decompress\zstd_decompress_internal.h">
-      <Filter>decompress</Filter>
-    </ClInclude>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="lib\common\pool.c">
-      <Filter>common</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\common\threading.c">
-      <Filter>common</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\common\xxhash.c">
-      <Filter>common</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\common\zstd_common.c">
-      <Filter>common</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\common\debug.c">
-      <Filter>common</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\common\entropy_common.c">
-      <Filter>common</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\common\error_private.c">
-      <Filter>common</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\common\fse_decompress.c">
-      <Filter>common</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\compress\zstd_lazy.c">
-      <Filter>compress</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\compress\zstd_ldm.c">
-      <Filter>compress</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\compress\zstd_opt.c">
-      <Filter>compress</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\compress\zstdmt_compress.c">
-      <Filter>compress</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\compress\fse_compress.c">
-      <Filter>compress</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\compress\hist.c">
-      <Filter>compress</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\compress\huf_compress.c">
-      <Filter>compress</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\compress\zstd_compress.c">
-      <Filter>compress</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\compress\zstd_compress_literals.c">
-      <Filter>compress</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\compress\zstd_compress_sequences.c">
-      <Filter>compress</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\compress\zstd_compress_superblock.c">
-      <Filter>compress</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\compress\zstd_double_fast.c">
-      <Filter>compress</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\compress\zstd_fast.c">
-      <Filter>compress</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\decompress\zstd_decompress.c">
-      <Filter>decompress</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\decompress\zstd_decompress_block.c">
-      <Filter>decompress</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\decompress\huf_decompress.c">
-      <Filter>decompress</Filter>
-    </ClCompile>
-    <ClCompile Include="lib\decompress\zstd_ddict.c">
-      <Filter>decompress</Filter>
-    </ClCompile>
-  </ItemGroup>
-</Project>
\ No newline at end of file